github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/committed/diff.go (about) 1 package committed 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 8 "github.com/treeverse/lakefs/pkg/graveler" 9 ) 10 11 type iteratorValue struct { 12 rng *Range 13 record *graveler.ValueRecord 14 err error 15 } 16 17 // ErrNoRange occurs when calling nextRange while not in a range, could happen when the diff is currently comparing keys in two different ranges 18 var ErrNoRange = errors.New("diff is not currently in a range") 19 20 // currentRangeData holds state of the current RangeDiff 21 type currentRangeData struct { 22 iter Iterator 23 value *iteratorValue 24 currentRangeDiff *RangeDiff 25 } 26 27 type diffIterator struct { 28 ctx context.Context 29 left Iterator 30 right Iterator 31 leftValue iteratorValue 32 rightValue iteratorValue 33 currentRange currentRangeData 34 currentDiff *graveler.Diff 35 err error 36 state diffIteratorState 37 } 38 39 // currentRangeLeftIdentity returns the current range identity in case the current range is the left range, otherwise returns nil 40 func (d diffIterator) currentRangeLeftIdentity() (res []byte) { 41 if d.currentRange.iter == d.left { 42 res = make([]byte, len(d.currentRange.value.record.Identity)) 43 copy(res, d.currentRange.value.record.Identity) 44 } 45 return 46 } 47 48 type diffIteratorState int 49 50 const ( 51 diffIteratorStatePreInit diffIteratorState = iota 52 diffIteratorStateOpen 53 diffIteratorStateClosed 54 ) 55 56 type diffIteratorCompareResult int 57 58 const ( 59 diffItCompareResultDone diffIteratorCompareResult = iota 60 diffItCompareResultSameRanges 61 diffItCompareResultSameIdentities 62 diffItCompareResultSameKeys 63 diffItCompareResultNeedStartRangeBoth 64 diffItCompareResultNeedStartRangeLeft 65 diffItCompareResultNeedStartRangeRight 66 diffItCompareResultLeftBeforeRight 67 diffItCompareResultRightBeforeLeft 68 diffItCompareResultRightRangeBeforeLeft 69 diffItCompareResultLeftRangeBeforeRight 70 diffItCompareResultSameBounds 71 ) 72 73 func NewDiffIterator(ctx context.Context, left Iterator, right Iterator) DiffIterator { 74 return &diffIterator{ 75 ctx: ctx, 76 left: left, 77 right: right, 78 } 79 } 80 81 func diffIteratorNextValue(it Iterator) (*graveler.ValueRecord, *Range, error) { 82 if it.Next() { 83 rec, rng := it.Value() 84 return rec, rng, nil 85 } 86 return nil, nil, it.Err() 87 } 88 89 func diffIteratorNextRange(it Iterator) (*graveler.ValueRecord, *Range, error) { 90 if it.NextRange() { 91 val, rng := it.Value() 92 return val, rng, nil 93 } 94 return nil, nil, it.Err() 95 } 96 97 func (d *diffIterator) setCurrentRangeRight() { 98 d.currentRange.iter = d.right 99 d.currentRange.value = &d.rightValue 100 d.currentRange.currentRangeDiff = &RangeDiff{ 101 Type: graveler.DiffTypeAdded, 102 Range: d.rightValue.rng.Copy(), 103 } 104 d.currentDiff = nil 105 } 106 107 func (d *diffIterator) setCurrentRangeBoth() { 108 d.currentRange.iter = nil 109 d.currentRange.value = &d.rightValue 110 d.currentRange.currentRangeDiff = &RangeDiff{ 111 Type: graveler.DiffTypeChanged, 112 Range: d.rightValue.rng.Copy(), 113 LeftIdentity: d.leftValue.rng.ID, 114 } 115 d.currentDiff = nil 116 } 117 118 func (d *diffIterator) setCurrentRangeLeft() { 119 d.currentRange.iter = d.left 120 d.currentRange.value = &d.leftValue 121 d.currentRange.currentRangeDiff = &RangeDiff{ 122 Type: graveler.DiffTypeRemoved, 123 Range: d.leftValue.rng.Copy(), 124 } 125 d.leftValue.record = nil 126 d.currentDiff = nil 127 } 128 129 func (d *diffIterator) clearCurrentRange() { 130 d.currentRange.iter = nil 131 d.currentRange.value = nil 132 d.currentRange.currentRangeDiff = nil 133 } 134 135 func (d *diffIterator) compareDiffKeys() int { 136 if d.leftValue.rng == nil { 137 return 1 138 } 139 if d.rightValue.rng == nil { 140 return -1 141 } 142 return bytes.Compare(getCurrentKey(d.left), getCurrentKey(d.right)) 143 } 144 145 func (d *diffIterator) compareDiffIterators() diffIteratorCompareResult { 146 leftRange := d.leftValue.rng 147 rightRange := d.rightValue.rng 148 if leftRange == nil && rightRange == nil { 149 return diffItCompareResultDone 150 } 151 if leftRange != nil && rightRange != nil && leftRange.ID == rightRange.ID { 152 return diffItCompareResultSameRanges 153 } 154 leftStartRange := leftRange != nil && d.leftValue.record == nil 155 rightStartRange := rightRange != nil && d.rightValue.record == nil 156 leftBeforeRight := leftStartRange && rightRange == nil 157 rightBeforeLeft := rightStartRange && leftRange == nil 158 sameBounds := false 159 if leftStartRange && rightStartRange { 160 leftBeforeRight = bytes.Compare(leftRange.MaxKey, rightRange.MinKey) < 0 161 rightBeforeLeft = bytes.Compare(rightRange.MaxKey, leftRange.MinKey) < 0 162 sameBounds = bytes.Equal(leftRange.MinKey, rightRange.MinKey) && bytes.Equal(leftRange.MaxKey, rightRange.MaxKey) 163 } 164 comp := d.compareDiffKeys() 165 switch { 166 case leftBeforeRight: 167 return diffItCompareResultLeftRangeBeforeRight 168 case rightBeforeLeft: 169 return diffItCompareResultRightRangeBeforeLeft 170 case leftStartRange && rightStartRange && comp == 0 && sameBounds: 171 return diffItCompareResultSameBounds 172 case leftStartRange && rightStartRange && comp == 0: 173 return diffItCompareResultNeedStartRangeBoth 174 case leftStartRange && comp <= 0: 175 return diffItCompareResultNeedStartRangeLeft 176 case rightStartRange && comp >= 0: 177 return diffItCompareResultNeedStartRangeRight 178 case comp == 0 && bytes.Equal(d.leftValue.record.Identity, d.rightValue.record.Identity): 179 return diffItCompareResultSameIdentities 180 case comp == 0: 181 return diffItCompareResultSameKeys 182 case comp < 0: 183 return diffItCompareResultLeftBeforeRight 184 default: 185 return diffItCompareResultRightBeforeLeft 186 } 187 } 188 189 func (d *diffIterator) Next() bool { 190 if d.state == diffIteratorStateClosed || d.err != nil { 191 return false 192 } 193 if d.state == diffIteratorStatePreInit { 194 d.state = diffIteratorStateOpen 195 d.clearCurrentRange() 196 d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left) 197 d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right) 198 } 199 if d.currentRange.iter != nil { 200 // we are currently inside a range 201 d.currentRange.value.record, d.currentRange.value.rng, d.currentRange.value.err = diffIteratorNextValue(d.currentRange.iter) 202 if d.currentRange.value.err != nil { 203 d.err = d.currentRange.value.err 204 d.currentDiff = nil 205 d.clearCurrentRange() 206 return false 207 } 208 if d.currentRange.value.record != nil { 209 leftIdentity := d.currentRangeLeftIdentity() 210 d.currentDiff = &graveler.Diff{Type: d.currentRange.currentRangeDiff.Type, Key: d.currentRange.value.record.Key.Copy(), Value: d.currentRange.value.record.Value, LeftIdentity: leftIdentity} 211 return true 212 } 213 // current diff range over - clear current range and continue to get next range/value 214 d.clearCurrentRange() 215 } 216 select { 217 case <-d.ctx.Done(): 218 d.err = d.ctx.Err() 219 return false 220 default: 221 for { 222 if d.rightValue.err != nil { 223 d.err = d.rightValue.err 224 } 225 if d.leftValue.err != nil { 226 d.err = d.leftValue.err 227 } 228 if d.err != nil { 229 d.currentDiff = nil 230 d.clearCurrentRange() 231 return false 232 } 233 compareResult := d.compareDiffIterators() 234 switch compareResult { 235 case diffItCompareResultDone: 236 d.currentDiff = nil 237 return false 238 case diffItCompareResultSameRanges: 239 d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextRange(d.left) 240 d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextRange(d.right) 241 case diffItCompareResultLeftRangeBeforeRight: 242 d.setCurrentRangeLeft() 243 return true 244 case diffItCompareResultRightRangeBeforeLeft: 245 d.setCurrentRangeRight() 246 return true 247 case diffItCompareResultSameBounds: 248 d.setCurrentRangeBoth() 249 d.state = diffIteratorStatePreInit 250 return true 251 case diffItCompareResultSameKeys: 252 // same keys on different ranges 253 d.currentDiff = &graveler.Diff{Type: graveler.DiffTypeChanged, Key: d.rightValue.record.Key.Copy(), Value: d.rightValue.record.Value, LeftIdentity: d.leftValue.record.Identity} 254 d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left) 255 d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right) 256 return true 257 case diffItCompareResultSameIdentities, diffItCompareResultNeedStartRangeBoth: 258 d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left) 259 d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right) 260 case diffItCompareResultNeedStartRangeLeft: 261 d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left) 262 case diffItCompareResultNeedStartRangeRight: 263 d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right) 264 case diffItCompareResultLeftBeforeRight: 265 // nothing on right, or left before right 266 d.currentDiff = &graveler.Diff{Type: graveler.DiffTypeRemoved, Key: d.leftValue.record.Key.Copy(), Value: d.leftValue.record.Value, LeftIdentity: d.leftValue.record.Identity} 267 d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left) 268 return true 269 case diffItCompareResultRightBeforeLeft: 270 // nothing on left, or right before left 271 d.currentDiff = &graveler.Diff{Type: graveler.DiffTypeAdded, Key: d.rightValue.record.Key.Copy(), Value: d.rightValue.record.Value} 272 d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right) 273 return true 274 } 275 } 276 } 277 } 278 279 func (d *diffIterator) NextRange() bool { 280 if d.currentRange.currentRangeDiff != nil && d.currentRange.currentRangeDiff.Type == graveler.DiffTypeChanged { 281 d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextRange(d.left) 282 d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextRange(d.right) 283 d.clearCurrentRange() 284 d.state = diffIteratorStateOpen 285 return d.Next() 286 } 287 if d.currentRange.iter == nil { 288 d.err = ErrNoRange 289 return false 290 } 291 d.currentRange.value.record, d.currentRange.value.rng, d.currentRange.value.err = diffIteratorNextRange(d.currentRange.iter) 292 d.clearCurrentRange() 293 return d.Next() 294 } 295 296 func (d *diffIterator) SeekGE(id graveler.Key) { 297 d.left.SeekGE(id) 298 d.right.SeekGE(id) 299 d.currentDiff = nil 300 d.leftValue = iteratorValue{} 301 d.rightValue = iteratorValue{} 302 d.err = nil 303 d.state = diffIteratorStatePreInit 304 d.currentRange = currentRangeData{} 305 } 306 307 func (d *diffIterator) Value() (*graveler.Diff, *RangeDiff) { 308 return d.currentDiff, d.currentRange.currentRangeDiff 309 } 310 311 func (d *diffIterator) Err() error { 312 return d.err 313 } 314 315 func (d *diffIterator) Close() { 316 d.left.Close() 317 d.right.Close() 318 d.currentDiff = nil 319 d.err = nil 320 d.state = diffIteratorStateClosed 321 } 322 323 func getCurrentKey(it Iterator) []byte { 324 val, rng := it.Value() 325 if val == nil { 326 return rng.MinKey 327 } 328 return val.Key 329 }