github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/diff/diff.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package diff 23 24 import ( 25 "context" 26 "errors" 27 "sync/atomic" 28 29 "golang.org/x/sync/errgroup" 30 31 "github.com/dolthub/dolt/go/store/types" 32 ) 33 34 type ( 35 diffFunc func(ctx context.Context, changeChan chan<- types.ValueChanged) error 36 pathPartFunc func(v types.Value) (types.PathPart, error) 37 valueFunc func(k types.Value) (types.Value, error) 38 ) 39 40 // Difference represents a "diff" between two Noms graphs. 41 type Difference struct { 42 // Path to the Value that has changed 43 Path types.Path 44 // ChangeType indicates the type of diff: modified, added, deleted 45 ChangeType types.DiffChangeType 46 // OldValue is Value before the change, can be nil if Value was added 47 OldValue types.Value 48 // NewValue is Value after the change, can be nil if Value was removed 49 NewValue types.Value 50 // NewKeyValue is used for when elements are added to diffs with a 51 // non-primitive key. The new key must available when the map gets updated. 52 NewKeyValue types.Value 53 // KeyValue holds the key associated with a changed map value 54 KeyValue types.Value 55 } 56 57 func (dif Difference) IsEmpty() bool { 58 return dif.Path == nil && dif.OldValue == nil && dif.NewValue == nil 59 } 60 61 type ShouldDescFunc func(v1, v2 types.Value) bool 62 63 // differ is used internally to hold information necessary for diffing two graphs. 64 type differ struct { 65 // Channel used to send Difference objects back to caller 66 diffChan chan<- Difference 67 // Use LeftRight diff as opposed to TopDown 68 leftRight bool 69 70 shouldDescend ShouldDescFunc 71 72 eg *errgroup.Group 73 asyncPanic *atomic.Value 74 } 75 76 // Diff traverses two graphs simultaneously looking for differences. It returns 77 // two channels: a DiffReceiveChan that the caller can use to iterate over the 78 // diffs in the graph and a StopSendChanel that a caller can use to signal the 79 // Diff function to stop processing. 80 // Diff returns the Differences in depth-first first order. A 'diff' is defined 81 // as one of the following conditions: 82 // * a Value is Added or Removed from a node in the graph 83 // * the type of a Value has changed in the graph 84 // * a primitive (i.e. Bool, Float, String, Ref or Blob) Value has changed. 85 // 86 // A Difference is not returned when a non-primitive value has been modified. For 87 // example, a struct field has been changed from one Value of type Employee to 88 // another. Those modifications are accounted for by the Differences described 89 // above at a lower point in the graph. 90 // 91 // If leftRight is true then the left-right diff is used for ordered sequences 92 // - see Diff vs DiffLeftRight in Set and Map. 93 // 94 // Note: the function sends messages on diffChan and checks whether stopChan has 95 // been closed to know if it needs to terminate diffing early. To function 96 // properly it needs to be executed concurrently with code that reads values from 97 // diffChan. The following is a typical invocation of Diff(): 98 // dChan := make(chan Difference) 99 // sChan := make(chan struct{}) 100 // go func() { 101 // d.Diff(s3, s4, dChan, sChan, leftRight) 102 // close(dChan) 103 // }() 104 // for dif := range dChan { 105 // <some code> 106 // } 107 func Diff(ctx context.Context, v1, v2 types.Value, dChan chan<- Difference, leftRight bool, descFunc ShouldDescFunc) error { 108 f := func(ctx context.Context, d differ, v1, v2 types.Value) error { 109 return d.diff(ctx, nil, v1, v2) 110 } 111 112 return diff(ctx, f, v1, v2, dChan, leftRight, descFunc) 113 } 114 115 func DiffMapRange(ctx context.Context, m1, m2 types.Map, start types.Value, inRange types.ValueInRange, dChan chan<- Difference, leftRight bool, descFunc ShouldDescFunc) error { 116 f := func(ctx context.Context, d differ, v1, v2 types.Value) error { 117 return d.diffMapsInRange(ctx, nil, m1, m2, start, inRange) 118 } 119 120 return diff(ctx, f, m1, m2, dChan, leftRight, descFunc) 121 } 122 123 func diff(ctx context.Context, 124 f func(ctx context.Context, d differ, v1, v2 types.Value) error, 125 v1, v2 types.Value, 126 dChan chan<- Difference, 127 leftRight bool, 128 descFunc ShouldDescFunc) error { 129 if descFunc == nil { 130 descFunc = ShouldDescend 131 } 132 133 eg, ctx := errgroup.WithContext(ctx) 134 d := differ{ 135 diffChan: dChan, 136 leftRight: leftRight, 137 shouldDescend: descFunc, 138 139 eg: eg, 140 asyncPanic: new(atomic.Value), 141 } 142 if !v1.Equals(v2) { 143 if !d.shouldDescend(v1, v2) { 144 return d.sendDiff(ctx, Difference{Path: nil, ChangeType: types.DiffChangeModified, OldValue: v1, NewValue: v2}) 145 } else { 146 d.GoCatchPanic(func() error { 147 return f(ctx, d, v1, v2) 148 }) 149 return d.Wait() 150 } 151 } 152 return nil 153 } 154 155 func (d differ) diff(ctx context.Context, p types.Path, v1, v2 types.Value) error { 156 switch v1.Kind() { 157 case types.ListKind: 158 return d.diffLists(ctx, p, v1.(types.List), v2.(types.List)) 159 case types.MapKind: 160 return d.diffMaps(ctx, p, v1.(types.Map), v2.(types.Map)) 161 case types.SetKind: 162 return d.diffSets(ctx, p, v1.(types.Set), v2.(types.Set)) 163 case types.StructKind: 164 return d.diffStructs(ctx, p, v1.(types.Struct), v2.(types.Struct)) 165 default: 166 panic("Unrecognized type in diff function") 167 } 168 } 169 170 var AsyncPanicErr = errors.New("async panic") 171 172 func (d differ) GoCatchPanic(f func() error) { 173 d.eg.Go(func() (err error) { 174 defer func() { 175 if r := recover(); r != nil { 176 d.asyncPanic.Store(r) 177 err = AsyncPanicErr 178 } 179 }() 180 return f() 181 }) 182 } 183 184 func (d differ) Wait() error { 185 err := d.eg.Wait() 186 if p := d.asyncPanic.Load(); p != nil { 187 panic(p) 188 } 189 return err 190 } 191 192 func (d differ) diffLists(ctx context.Context, p types.Path, v1, v2 types.List) error { 193 spliceChan := make(chan types.Splice) 194 195 d.GoCatchPanic(func() error { 196 defer close(spliceChan) 197 return v2.Diff(ctx, v1, spliceChan) 198 }) 199 200 for splice := range spliceChan { 201 if ctx.Err() != nil { 202 return ctx.Err() 203 } 204 if splice.SpRemoved == splice.SpAdded { 205 // Heuristic: list only has modifications. 206 for i := uint64(0); i < splice.SpRemoved; i++ { 207 lastEl, err := v1.Get(ctx, splice.SpAt+i) 208 if err != nil { 209 return err 210 } 211 212 newEl, err := v2.Get(ctx, splice.SpFrom+i) 213 if err != nil { 214 return err 215 } 216 217 if d.shouldDescend(lastEl, newEl) { 218 idx := types.Float(splice.SpAt + i) 219 err := d.diff(ctx, append(p, types.NewIndexPath(idx)), lastEl, newEl) 220 if err != nil { 221 return err 222 } 223 } else { 224 p1 := p.Append(types.NewIndexPath(types.Float(splice.SpAt + i))) 225 oldVal, err := v1.Get(ctx, splice.SpAt+i) 226 if err != nil { 227 return err 228 } 229 230 newVal, err := v2.Get(ctx, splice.SpFrom+i) 231 if err != nil { 232 return err 233 } 234 235 dif := Difference{Path: p1, ChangeType: types.DiffChangeModified, OldValue: oldVal, NewValue: newVal} 236 err = d.sendDiff(ctx, dif) 237 if err != nil { 238 return err 239 } 240 } 241 } 242 continue 243 } 244 245 // Heuristic: list only has additions/removals. 246 for i := uint64(0); i < splice.SpRemoved; i++ { 247 p1 := p.Append(types.NewIndexPath(types.Float(splice.SpAt + i))) 248 oldVal, err := v1.Get(ctx, splice.SpAt+i) 249 if err != nil { 250 return err 251 } 252 253 dif := Difference{Path: p1, ChangeType: types.DiffChangeRemoved, OldValue: oldVal, NewValue: nil} 254 err = d.sendDiff(ctx, dif) 255 if err != nil { 256 return err 257 } 258 } 259 for i := uint64(0); i < splice.SpAdded; i++ { 260 p1 := p.Append(types.NewIndexPath(types.Float(splice.SpFrom + i))) 261 newVal, err := v2.Get(ctx, splice.SpFrom+i) 262 if err != nil { 263 return err 264 } 265 266 dif := Difference{Path: p1, ChangeType: types.DiffChangeAdded, OldValue: nil, NewValue: newVal} 267 err = d.sendDiff(ctx, dif) 268 if err != nil { 269 return err 270 } 271 } 272 } 273 274 return nil 275 } 276 277 func (d differ) diffMaps(ctx context.Context, p types.Path, v1, v2 types.Map) error { 278 trueFunc := func(value types.Value) (bool, error) { 279 return true, nil 280 } 281 282 return d.diffMapsInRange(ctx, p, v1, v2, nil, trueFunc) 283 } 284 285 func (d differ) diffMapsInRange(ctx context.Context, p types.Path, v1, v2 types.Map, start types.Value, inRange types.ValueInRange) error { 286 return d.diffOrdered(ctx, p, 287 func(v types.Value) (types.PathPart, error) { 288 if types.ValueCanBePathIndex(v) { 289 return types.NewIndexPath(v), nil 290 } else { 291 h, err := v.Hash(v1.Format()) 292 293 if err != nil { 294 return nil, err 295 } 296 297 return types.NewHashIndexPath(h), nil 298 } 299 }, 300 func(ctx context.Context, cc chan<- types.ValueChanged) error { 301 if d.leftRight { 302 return v2.DiffLeftRightInRange(ctx, v1, start, inRange, cc) 303 } else { 304 if start != nil { 305 panic("not implemented") 306 } 307 308 return v2.Diff(ctx, v1, cc) 309 } 310 }, 311 func(k types.Value) (types.Value, error) { 312 return k, nil 313 }, 314 func(k types.Value) (types.Value, error) { 315 v, _, err := v1.MaybeGet(ctx, k) 316 return v, err 317 }, 318 func(k types.Value) (types.Value, error) { 319 v, _, err := v2.MaybeGet(ctx, k) 320 return v, err 321 }, 322 ) 323 } 324 325 func (d differ) diffStructs(ctx context.Context, p types.Path, v1, v2 types.Struct) error { 326 str := func(v types.Value) string { 327 return string(v.(types.String)) 328 } 329 return d.diffOrdered(ctx, p, 330 func(v types.Value) (types.PathPart, error) { 331 return types.NewFieldPath(str(v)), nil 332 }, 333 func(ctx context.Context, cc chan<- types.ValueChanged) error { 334 return v2.Diff(ctx, v1, cc) 335 }, 336 func(k types.Value) (types.Value, error) { return k, nil }, 337 func(k types.Value) (types.Value, error) { 338 val, _, err := v1.MaybeGet(str(k)) 339 return val, err 340 }, 341 func(k types.Value) (types.Value, error) { 342 val, _, err := v2.MaybeGet(str(k)) 343 return val, err 344 }, 345 ) 346 } 347 348 func (d differ) diffSets(ctx context.Context, p types.Path, v1, v2 types.Set) error { 349 return d.diffOrdered(ctx, p, 350 func(v types.Value) (types.PathPart, error) { 351 if types.ValueCanBePathIndex(v) { 352 return types.NewIndexPath(v), nil 353 } 354 355 h, err := v.Hash(v1.Format()) 356 357 if err != nil { 358 return nil, err 359 } 360 361 return types.NewHashIndexPath(h), nil 362 }, 363 func(ctx context.Context, cc chan<- types.ValueChanged) error { 364 if d.leftRight { 365 return v2.DiffLeftRight(ctx, v1, cc) 366 } else { 367 return v2.Diff(ctx, v1, cc) 368 } 369 }, 370 func(k types.Value) (types.Value, error) { return k, nil }, 371 func(k types.Value) (types.Value, error) { return k, nil }, 372 func(k types.Value) (types.Value, error) { return k, nil }, 373 ) 374 } 375 376 func (d differ) diffOrdered(ctx context.Context, p types.Path, ppf pathPartFunc, df diffFunc, kf, v1, v2 valueFunc) error { 377 changeChan := make(chan types.ValueChanged) 378 379 d.GoCatchPanic(func() error { 380 defer close(changeChan) 381 return df(ctx, changeChan) 382 }) 383 384 for change := range changeChan { 385 if ctx.Err() != nil { 386 return ctx.Err() 387 } 388 389 k, err := kf(change.Key) 390 if err != nil { 391 return err 392 } 393 394 ppfRes, err := ppf(k) 395 if err != nil { 396 return err 397 } 398 399 p1 := p.Append(ppfRes) 400 401 switch change.ChangeType { 402 case types.DiffChangeAdded: 403 newVal, err := v2(change.Key) 404 if err != nil { 405 return err 406 } 407 408 dif := Difference{Path: p1, ChangeType: types.DiffChangeAdded, OldValue: nil, NewValue: newVal, NewKeyValue: k, KeyValue: change.Key} 409 err = d.sendDiff(ctx, dif) 410 if err != nil { 411 return err 412 } 413 case types.DiffChangeRemoved: 414 oldVal, err := v1(change.Key) 415 if err != nil { 416 return err 417 } 418 419 dif := Difference{Path: p1, ChangeType: types.DiffChangeRemoved, OldValue: oldVal, KeyValue: change.Key} 420 err = d.sendDiff(ctx, dif) 421 if err != nil { 422 return err 423 } 424 case types.DiffChangeModified: 425 c1, err := v1(change.Key) 426 if err != nil { 427 return err 428 } 429 430 c2, err := v2(change.Key) 431 if err != nil { 432 return err 433 } 434 435 if d.shouldDescend(c1, c2) { 436 err = d.diff(ctx, p1, c1, c2) 437 if err != nil { 438 return err 439 } 440 } else { 441 dif := Difference{Path: p1, ChangeType: types.DiffChangeModified, OldValue: c1, NewValue: c2, KeyValue: change.Key} 442 err = d.sendDiff(ctx, dif) 443 if err != nil { 444 return err 445 } 446 } 447 default: 448 panic("unknown change type") 449 } 450 } 451 452 return nil 453 } 454 455 // shouldDescend returns true, if Value is not primitive or is a Ref. 456 func ShouldDescend(v1, v2 types.Value) bool { 457 kind := v1.Kind() 458 return !types.IsPrimitiveKind(kind) && kind == v2.Kind() && kind != types.RefKind && kind != types.TupleKind 459 } 460 461 func (d differ) sendDiff(ctx context.Context, dif Difference) error { 462 select { 463 case <-ctx.Done(): 464 return ctx.Err() 465 case d.diffChan <- dif: 466 return nil 467 } 468 }