github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/three_way_differ.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tree 16 17 import ( 18 "bytes" 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 24 "github.com/dolthub/go-mysql-server/sql" 25 26 "github.com/dolthub/dolt/go/store/val" 27 ) 28 29 // ThreeWayDiffer is an iterator that gives an increased level of granularity 30 // of diffs between three root values. See diffOp for the classes of diffs. 31 type ThreeWayDiffer[K ~[]byte, O Ordering[K]] struct { 32 lIter, rIter Differ[K, O] 33 resolveCb resolveCb 34 lDiff Diff 35 rDiff Diff 36 lDone bool 37 rDone bool 38 keyless bool 39 leftAndRightSchemasDiffer bool 40 } 41 42 //var _ DiffIter = (*threeWayDiffer[Item, val.TupleDesc])(nil) 43 44 type resolveCb func(*sql.Context, val.Tuple, val.Tuple, val.Tuple) (val.Tuple, bool, error) 45 46 // ThreeWayDiffInfo stores contextual data that can influence the diff. 47 // If |LeftSchemaChange| is true, then the left side's bytes have a different interpretation from the base, 48 // so every row in both Left and Base should be considered a modification, even if they have the same bytes. 49 // If |RightSchemaChange| is true, then the right side's bytes have a different interpretation from the base, 50 // so every row in both Right and Base should be considered a modification, even if they have the same bytes. 51 // Note that these values aren't set for schema changes that have no effect on the meaning of the bytes, 52 // such as collation. 53 // If |LeftAndRightSchemasDiffer| is true, then the left and right sides of the diff have a different interpretation 54 // of their bytes, so there cannot be any convergent edits, even if two rows in Left and Right have the same bytes. 55 type ThreeWayDiffInfo struct { 56 LeftSchemaChange bool 57 RightSchemaChange bool 58 LeftAndRightSchemasDiffer bool 59 } 60 61 func NewThreeWayDiffer[K, V ~[]byte, O Ordering[K]]( 62 ctx context.Context, 63 ns NodeStore, 64 left StaticMap[K, V, O], 65 right StaticMap[K, V, O], 66 base StaticMap[K, V, O], 67 resolveCb resolveCb, 68 keyless bool, 69 diffInfo ThreeWayDiffInfo, 70 order O, 71 ) (*ThreeWayDiffer[K, O], error) { 72 // probably compute each of these separately 73 ld, err := DifferFromRoots[K](ctx, ns, ns, base.Root, left.Root, order, diffInfo.LeftSchemaChange) 74 if err != nil { 75 return nil, err 76 } 77 78 rd, err := DifferFromRoots[K](ctx, ns, ns, base.Root, right.Root, order, diffInfo.RightSchemaChange) 79 if err != nil { 80 return nil, err 81 } 82 83 return &ThreeWayDiffer[K, O]{ 84 lIter: ld, 85 rIter: rd, 86 resolveCb: resolveCb, 87 keyless: keyless, 88 leftAndRightSchemasDiffer: diffInfo.LeftAndRightSchemasDiffer, 89 }, nil 90 } 91 92 type threeWayDiffState uint8 93 94 const ( 95 dsUnknown threeWayDiffState = iota 96 dsInit 97 dsDiffFinalize 98 dsCompare 99 dsNewLeft 100 dsNewRight 101 dsMatch 102 dsMatchFinalize 103 ) 104 105 func (d *ThreeWayDiffer[K, O]) Next(ctx *sql.Context) (ThreeWayDiff, error) { 106 var err error 107 var res ThreeWayDiff 108 nextState := dsInit 109 for { 110 // The regular flow will be: 111 // - dsInit: get the first diff in each iterator if this is the first Next 112 // - dsDiffFinalize: short-circuit comparing if one iterator is exhausted 113 // - dsCompare: compare keys for the leading diffs, to determine whether 114 // the diffs are independent, or require further disambiguation. 115 // - dsNewLeft: an edit was made to the left root value for a key not edited 116 // on the right. 117 // - dsNewRight: ditto above, edit to key only on right. 118 // - dsMatch: edits made to the same key in left and right roots, either 119 // resolve non-overlapping field changes or indicate schema/value conflict. 120 // - dsMatchFinalize: increment both iters after performing match disambiguation. 121 switch nextState { 122 case dsInit: 123 if !d.lDone { 124 if d.lDiff.Key == nil { 125 d.lDiff, err = d.lIter.Next(ctx) 126 if errors.Is(err, io.EOF) { 127 d.lDone = true 128 } else if err != nil { 129 return ThreeWayDiff{}, err 130 } 131 } 132 } 133 if !d.rDone { 134 if d.rDiff.Key == nil { 135 d.rDiff, err = d.rIter.Next(ctx) 136 if errors.Is(err, io.EOF) { 137 d.rDone = true 138 } else if err != nil { 139 return ThreeWayDiff{}, err 140 } 141 } 142 } 143 nextState = dsDiffFinalize 144 case dsDiffFinalize: 145 if d.lDone && d.rDone { 146 return ThreeWayDiff{}, io.EOF 147 } else if d.lDone { 148 nextState = dsNewRight 149 } else if d.rDone { 150 nextState = dsNewLeft 151 } else { 152 nextState = dsCompare 153 } 154 case dsCompare: 155 cmp := d.lIter.order.Compare(K(d.lDiff.Key), K(d.rDiff.Key)) 156 switch { 157 case cmp < 0: 158 nextState = dsNewLeft 159 case cmp == 0: 160 nextState = dsMatch 161 case cmp > 0: 162 nextState = dsNewRight 163 default: 164 } 165 case dsNewLeft: 166 res = d.newLeftEdit(d.lDiff.Key, d.lDiff.To, d.lDiff.Type) 167 d.lDiff, err = d.lIter.Next(ctx) 168 if errors.Is(err, io.EOF) { 169 d.lDone = true 170 } else if err != nil { 171 return ThreeWayDiff{}, err 172 } 173 return res, nil 174 case dsNewRight: 175 res = d.newRightEdit(d.rDiff.Key, d.rDiff.From, d.rDiff.To, d.rDiff.Type) 176 d.rDiff, err = d.rIter.Next(ctx) 177 if errors.Is(err, io.EOF) { 178 d.rDone = true 179 } else if err != nil { 180 return ThreeWayDiff{}, err 181 } 182 return res, nil 183 case dsMatch: 184 if d.lDiff.To == nil && d.rDiff.To == nil { 185 res = d.newConvergentEdit(d.lDiff.Key, d.lDiff.To, d.lDiff.Type) 186 } else if d.lDiff.To == nil || d.rDiff.To == nil { 187 // Divergent delete. Attempt to resolve. 188 _, ok, err := d.resolveCb(ctx, val.Tuple(d.lDiff.To), val.Tuple(d.rDiff.To), val.Tuple(d.lDiff.From)) 189 if err != nil { 190 return ThreeWayDiff{}, err 191 } 192 if !ok { 193 res = d.newDivergentDeleteConflict(d.lDiff.Key, d.lDiff.From, d.lDiff.To, d.rDiff.To) 194 } else { 195 res = d.newDivergentDeleteResolved(d.lDiff.Key, d.lDiff.From, d.lDiff.To, d.rDiff.To) 196 } 197 } else if d.lDiff.Type == d.rDiff.Type && bytes.Equal(d.lDiff.To, d.rDiff.To) { 198 res = d.newConvergentEdit(d.lDiff.Key, d.lDiff.To, d.lDiff.Type) 199 } else { 200 resolved, ok, err := d.resolveCb(ctx, val.Tuple(d.lDiff.To), val.Tuple(d.rDiff.To), val.Tuple(d.lDiff.From)) 201 if err != nil { 202 return ThreeWayDiff{}, err 203 } 204 if !ok { 205 res = d.newDivergentClashConflict(d.lDiff.Key, d.lDiff.From, d.lDiff.To, d.rDiff.To) 206 } else { 207 res = d.newDivergentResolved(d.lDiff.Key, d.lDiff.To, d.rDiff.To, Item(resolved)) 208 } 209 } 210 nextState = dsMatchFinalize 211 case dsMatchFinalize: 212 d.lDiff, err = d.lIter.Next(ctx) 213 if errors.Is(err, io.EOF) { 214 d.lDone = true 215 } else if err != nil { 216 return ThreeWayDiff{}, err 217 } 218 219 d.rDiff, err = d.rIter.Next(ctx) 220 if errors.Is(err, io.EOF) { 221 d.rDone = true 222 } else if err != nil { 223 return ThreeWayDiff{}, err 224 } 225 226 return res, nil 227 default: 228 panic(fmt.Sprintf("unknown threeWayDiffState: %d", nextState)) 229 } 230 } 231 } 232 233 func (d *ThreeWayDiffer[K, O]) Close() error { 234 return nil 235 } 236 237 //go:generate stringer -type=diffOp -linecomment 238 239 type DiffOp uint16 240 241 const ( 242 DiffOpLeftAdd DiffOp = iota // leftAdd 243 DiffOpRightAdd // rightAdd 244 DiffOpLeftDelete //leftDelete 245 DiffOpRightDelete //rightDelete 246 DiffOpLeftModify //leftModify 247 DiffOpRightModify //rightModify 248 DiffOpConvergentAdd //convergentAdd 249 DiffOpConvergentDelete //convergentDelete 250 DiffOpConvergentModify //convergentModify 251 DiffOpDivergentModifyResolved //divergentModifyResolved 252 DiffOpDivergentDeleteConflict //divergentDeleteConflict 253 DiffOpDivergentModifyConflict //divergentModifyConflict 254 DiffOpDivergentDeleteResolved //divergentDeleteConflict 255 ) 256 257 // ThreeWayDiff is a generic object for encoding a three way diff. 258 type ThreeWayDiff struct { 259 // Op indicates the type of diff 260 Op DiffOp 261 // a partial set of tuple values are set 262 // depending on the diffOp 263 Key, Base, Left, Right, Merged val.Tuple 264 } 265 266 func (d *ThreeWayDiffer[K, O]) newLeftEdit(key, left Item, typ DiffType) ThreeWayDiff { 267 var op DiffOp 268 switch typ { 269 case AddedDiff: 270 op = DiffOpLeftAdd 271 case ModifiedDiff: 272 op = DiffOpLeftModify 273 case RemovedDiff: 274 op = DiffOpLeftDelete 275 default: 276 panic("unknown diff type") 277 } 278 return ThreeWayDiff{ 279 Op: op, 280 Key: val.Tuple(key), 281 Left: val.Tuple(left), 282 } 283 } 284 285 func (d *ThreeWayDiffer[K, O]) newRightEdit(key, base, right Item, typ DiffType) ThreeWayDiff { 286 var op DiffOp 287 switch typ { 288 case AddedDiff: 289 op = DiffOpRightAdd 290 case ModifiedDiff: 291 op = DiffOpRightModify 292 case RemovedDiff: 293 op = DiffOpRightDelete 294 default: 295 panic("unknown diff type") 296 } 297 return ThreeWayDiff{ 298 Op: op, 299 Key: val.Tuple(key), 300 Base: val.Tuple(base), 301 Right: val.Tuple(right), 302 } 303 } 304 305 func (d *ThreeWayDiffer[K, O]) newConvergentEdit(key, left Item, typ DiffType) ThreeWayDiff { 306 var op DiffOp 307 switch typ { 308 case AddedDiff: 309 op = DiffOpConvergentAdd 310 case ModifiedDiff: 311 op = DiffOpConvergentModify 312 case RemovedDiff: 313 op = DiffOpConvergentDelete 314 default: 315 panic("unknown diff type") 316 } 317 return ThreeWayDiff{ 318 Op: op, 319 Key: val.Tuple(key), 320 Left: val.Tuple(left), 321 } 322 } 323 324 func (d *ThreeWayDiffer[K, O]) newDivergentResolved(key, left, right, merged Item) ThreeWayDiff { 325 return ThreeWayDiff{ 326 Op: DiffOpDivergentModifyResolved, 327 Key: val.Tuple(key), 328 Left: val.Tuple(left), 329 Right: val.Tuple(right), 330 Merged: val.Tuple(merged), 331 } 332 } 333 334 func (d *ThreeWayDiffer[K, O]) newDivergentDeleteConflict(key, base, left, right Item) ThreeWayDiff { 335 return ThreeWayDiff{ 336 Op: DiffOpDivergentDeleteConflict, 337 Key: val.Tuple(key), 338 Base: val.Tuple(base), 339 Left: val.Tuple(left), 340 Right: val.Tuple(right), 341 } 342 } 343 344 func (d *ThreeWayDiffer[K, O]) newDivergentDeleteResolved(key, base, left, right Item) ThreeWayDiff { 345 return ThreeWayDiff{ 346 Op: DiffOpDivergentDeleteResolved, 347 Key: val.Tuple(key), 348 Base: val.Tuple(base), 349 Left: val.Tuple(left), 350 Right: val.Tuple(right), 351 } 352 } 353 354 func (d *ThreeWayDiffer[K, O]) newDivergentClashConflict(key, base, left, right Item) ThreeWayDiff { 355 return ThreeWayDiff{ 356 Op: DiffOpDivergentModifyConflict, 357 Key: val.Tuple(key), 358 Base: val.Tuple(base), 359 Left: val.Tuple(left), 360 Right: val.Tuple(right), 361 } 362 }