github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/diff/apply_patch.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package diff
    23  
    24  import (
    25  	"context"
    26  	"fmt"
    27  
    28  	"github.com/dolthub/dolt/go/store/d"
    29  	"github.com/dolthub/dolt/go/store/types"
    30  )
    31  
    32  // Apply applies a Patch (list of diffs) to a graph. It fulfills the
    33  // following contract:
    34  //
    35  //	Given 2 Noms graphs: a1 and a2:
    36  //	  ApplyPatch(a1, Diff(a1, a2)) == a2
    37  //
    38  // This is useful for IncrementalUpdate() and possibly other problems. See
    39  // updater.go for more information.
    40  //
    41  // This function uses a patchStack to maintain state of the graph as it cycles
    42  // through the diffs in a patch, applying them to 'root' one by one. Because the
    43  // Difference objects in the patch can be sorted according to their path, each
    44  // one is applied in order. When done in combination with the stack, this enables
    45  // all Differences that change a particular node to be applied to that node
    46  // before it gets assigned back to it's parent.
    47  func Apply(ctx context.Context, vr types.ValueReader, root types.Value, patch Patch) (types.Value, error) {
    48  	if len(patch) == 0 {
    49  		return root, nil
    50  	}
    51  
    52  	var lastPath types.Path
    53  	stack := patchStack{}
    54  	types.SortWithErroringLess(ctx, vr.Format(), PatchSort{patch})
    55  
    56  	// Push the element on the stack that corresponds to the root
    57  	// node.
    58  	stack.push(nil, nil, types.DiffChangeModified, root, nil, nil)
    59  
    60  	for _, dif := range patch {
    61  		// get the path where this dif needs to be applied
    62  		p := dif.Path
    63  
    64  		// idx will hold the index of the last common element between p and
    65  		// lastPath (p from the last iteration).
    66  		var idx int
    67  
    68  		// p can be identical to lastPath in certain cases. For example, when
    69  		// one item gets removed from a list at the same place another item
    70  		// is added to it. In this case, we need pop the last operation of the
    71  		// stack early and set the idx to be the len(p) - 1.
    72  		// Otherwise, if the paths are different we can call commonPrefixCount()
    73  		if len(p) > 0 && p.Equals(lastPath) {
    74  			_, err := stack.pop(ctx)
    75  
    76  			if err != nil {
    77  				return nil, err
    78  			}
    79  
    80  			idx = len(p) - 1
    81  		} else {
    82  			idx = commonPrefixCount(lastPath, p)
    83  		}
    84  		lastPath = p
    85  
    86  		// if the stack has elements on it leftover from the last iteration. Pop
    87  		// those elements until the stack only has values in it that are
    88  		// referenced by this p. Popping an element on the stack, folds that
    89  		// value into it's parent.
    90  		for idx < stack.Len()-1 {
    91  			_, err := stack.pop(ctx)
    92  
    93  			if err != nil {
    94  				return nil, err
    95  			}
    96  		}
    97  
    98  		// tail is the part of the current path that has not yet been pushed
    99  		// onto the stack. Iterate over those pathParts and push those values
   100  		// onto the stack.
   101  		tail := p[idx:]
   102  		for i, pp := range tail {
   103  			top := stack.top()
   104  			parent := top.newestValue()
   105  			oldValue, err := pp.Resolve(ctx, parent, nil)
   106  
   107  			if err != nil {
   108  				return nil, err
   109  			}
   110  
   111  			var newValue types.Value
   112  			if i == len(tail)-1 { // last pathPart in this path
   113  				newValue = oldValue
   114  				oldValue = dif.OldValue
   115  			}
   116  			// Any intermediate elements on the stack will have a changeType
   117  			// of modified.  Leaf elements will be updated below to reflect the
   118  			// actual changeType.
   119  			stack.push(p, pp, types.DiffChangeModified, oldValue, newValue, dif.NewKeyValue)
   120  		}
   121  
   122  		// Update the top element in the stack with changeType from the dif and
   123  		// the NewValue from the diff
   124  		se := stack.top()
   125  		se.newValue = dif.NewValue
   126  		se.changeType = dif.ChangeType
   127  	}
   128  
   129  	// We're done applying diffs to the graph. Pop any elements left on the
   130  	// stack and return the new root.
   131  	var newRoot stackElem
   132  	for stack.Len() > 0 {
   133  		var err error
   134  		newRoot, err = stack.pop(ctx)
   135  
   136  		if err != nil {
   137  			return nil, err
   138  		}
   139  	}
   140  	return newRoot.newValue, nil
   141  }
   142  
   143  // updateNode handles the actual update of a node. It uses 'pp' to get the
   144  // information that it needs to update 'parent' with 'newVal'. 'oldVal' is also
   145  // passed in so that Sets can be updated correctly. This function is used by
   146  // the patchStack Pop() function to merge values into a new graph.
   147  func (stack *patchStack) updateNode(ctx context.Context, top *stackElem, parent types.Value) (types.Value, error) {
   148  	d.PanicIfTrue(parent == nil)
   149  	switch part := top.pathPart.(type) {
   150  	case types.FieldPath:
   151  		switch top.changeType {
   152  		case types.DiffChangeAdded:
   153  			return parent.(types.Struct).Set(part.Name, top.newValue)
   154  		case types.DiffChangeRemoved:
   155  			return parent.(types.Struct).Delete(part.Name)
   156  		case types.DiffChangeModified:
   157  			return parent.(types.Struct).Set(part.Name, top.newValue)
   158  		}
   159  	case types.IndexPath:
   160  		switch el := parent.(type) {
   161  		case types.List:
   162  			idx := uint64(part.Index.(types.Float))
   163  			offset := stack.adjustIndexOffset(top.path, top.changeType)
   164  			realIdx := idx + uint64(offset)
   165  			var nv types.Value
   166  			switch top.changeType {
   167  			case types.DiffChangeAdded:
   168  				if realIdx > el.Len() {
   169  					return el.Edit().Append(top.newValue).List(ctx)
   170  				} else {
   171  					return el.Edit().Insert(realIdx, top.newValue).List(ctx)
   172  				}
   173  
   174  			case types.DiffChangeRemoved:
   175  				return el.Edit().RemoveAt(realIdx).List(ctx)
   176  			case types.DiffChangeModified:
   177  				return el.Edit().Set(realIdx, top.newValue).List(ctx)
   178  			}
   179  			return nv, nil
   180  		case types.Map:
   181  			switch top.changeType {
   182  			case types.DiffChangeAdded:
   183  				return el.Edit().Set(part.Index, top.newValue).Map(ctx)
   184  			case types.DiffChangeRemoved:
   185  				return el.Edit().Remove(part.Index).Map(ctx)
   186  			case types.DiffChangeModified:
   187  				if part.IntoKey {
   188  					newPart := types.IndexPath{Index: part.Index}
   189  					ov, err := newPart.Resolve(ctx, parent, nil)
   190  
   191  					if err != nil {
   192  						return nil, err
   193  					}
   194  
   195  					return el.Edit().Remove(part.Index).Set(top.newValue, ov).Map(ctx)
   196  				}
   197  				return el.Edit().Set(part.Index, top.newValue).Map(ctx)
   198  			}
   199  		case types.Set:
   200  			if top.oldValue != nil {
   201  				se, err := el.Edit().Remove(ctx, top.oldValue)
   202  
   203  				if err != nil {
   204  					return nil, err
   205  				}
   206  
   207  				el, err = se.Set(ctx)
   208  
   209  				if err != nil {
   210  					return nil, err
   211  				}
   212  			}
   213  
   214  			if top.newValue != nil {
   215  				se, err := el.Edit().Insert(ctx, top.newValue)
   216  
   217  				if err != nil {
   218  					return nil, err
   219  				}
   220  
   221  				el, err = se.Set(ctx)
   222  
   223  				if err != nil {
   224  					return nil, err
   225  				}
   226  			}
   227  
   228  			return el, nil
   229  		}
   230  	case types.HashIndexPath:
   231  		switch el := parent.(type) {
   232  		case types.Set:
   233  			switch top.changeType {
   234  			case types.DiffChangeAdded:
   235  				se, err := el.Edit().Insert(ctx, top.newValue)
   236  
   237  				if err != nil {
   238  					return nil, err
   239  				}
   240  
   241  				return se.Set(ctx)
   242  			case types.DiffChangeRemoved:
   243  				se, err := el.Edit().Remove(ctx, top.oldValue)
   244  
   245  				if err != nil {
   246  					return nil, err
   247  				}
   248  
   249  				return se.Set(ctx)
   250  			case types.DiffChangeModified:
   251  				se, err := el.Edit().Remove(ctx, top.oldValue)
   252  
   253  				if err != nil {
   254  					return nil, err
   255  				}
   256  
   257  				se, err = se.Insert(ctx, top.newValue)
   258  
   259  				if err != nil {
   260  					return nil, err
   261  				}
   262  
   263  				return se.Set(ctx)
   264  			}
   265  		case types.Map:
   266  			keyPart := types.HashIndexPath{Hash: part.Hash, IntoKey: true}
   267  			k, err := keyPart.Resolve(ctx, parent, nil)
   268  
   269  			if err != nil {
   270  				return nil, err
   271  			}
   272  
   273  			switch top.changeType {
   274  			case types.DiffChangeAdded:
   275  				k := top.newKeyValue
   276  				return el.Edit().Set(k, top.newValue).Map(ctx)
   277  			case types.DiffChangeRemoved:
   278  				return el.Edit().Remove(k).Map(ctx)
   279  			case types.DiffChangeModified:
   280  				if part.IntoKey {
   281  					v, found, err := el.MaybeGet(ctx, k)
   282  
   283  					if err != nil {
   284  						return nil, err
   285  					}
   286  
   287  					d.PanicIfFalse(found)
   288  					return el.Edit().Remove(k).Set(top.newValue, v).Map(ctx)
   289  				}
   290  				return el.Edit().Set(k, top.newValue).Map(ctx)
   291  			}
   292  		}
   293  	}
   294  	panic(fmt.Sprintf("unreachable, pp.(type): %T", top.pathPart))
   295  }
   296  
   297  // Returns the count of the number of PathParts that two paths have in a common
   298  // prefix. The paths '.field1' and '.field2' have a 0 length common prefix.
   299  // Todo: move to types.Path?
   300  func commonPrefixCount(p1, p2 types.Path) int {
   301  	cnt := 0
   302  
   303  	for i, pp1 := range p1 {
   304  		var pp2 types.PathPart
   305  		if i < len(p2) {
   306  			pp2 = p2[i]
   307  		}
   308  		if pp1 != pp2 {
   309  			return cnt
   310  		}
   311  		cnt += 1
   312  	}
   313  	return cnt
   314  }
   315  
   316  type stackElem struct {
   317  	path        types.Path
   318  	pathPart    types.PathPart // from parent Value to this Value
   319  	changeType  types.DiffChangeType
   320  	oldValue    types.Value // can be nil if newValue is not nil
   321  	newValue    types.Value // can be nil if oldValue is not nil
   322  	newKeyValue types.Value
   323  }
   324  
   325  // newestValue returns newValue if not nil, otherwise oldValue. This is useful
   326  // when merging. Elements on the stack were 'push'ed there with the oldValue.
   327  // newValue may have been set when a value was 'pop'ed above it. This method
   328  // returns the last value that has been set.
   329  func (se stackElem) newestValue() types.Value {
   330  	if se.newValue != nil {
   331  		return se.newValue
   332  	}
   333  	return se.oldValue
   334  }
   335  
   336  type patchStack struct {
   337  	vals     []stackElem
   338  	lastPath types.Path
   339  	addCnt   int
   340  	rmCnt    int
   341  }
   342  
   343  func (stack *patchStack) push(p types.Path, pp types.PathPart, changeType types.DiffChangeType, oldValue, newValue, newKeyValue types.Value) {
   344  	stack.vals = append(stack.vals, stackElem{path: p, pathPart: pp, changeType: changeType, oldValue: oldValue, newValue: newValue, newKeyValue: newKeyValue})
   345  }
   346  
   347  func (stack *patchStack) top() *stackElem {
   348  	return &stack.vals[len(stack.vals)-1]
   349  }
   350  
   351  // pop applies the change to the graph. When an element is 'pop'ed from the stack,
   352  // this function uses the pathPart to merge that value into it's parent.
   353  func (stack *patchStack) pop(ctx context.Context) (stackElem, error) {
   354  	top := stack.top()
   355  	stack.vals = stack.vals[:len(stack.vals)-1]
   356  	if stack.Len() > 0 {
   357  		newTop := stack.top()
   358  		parent := newTop.newestValue()
   359  
   360  		var err error
   361  		newTop.newValue, err = stack.updateNode(ctx, top, parent)
   362  
   363  		if err != nil {
   364  			return stackElem{}, err
   365  		}
   366  	}
   367  	return *top, nil
   368  }
   369  
   370  func (stack *patchStack) Len() int {
   371  	return len(stack.vals)
   372  }
   373  
   374  // adjustIndexOffset returns an offset that needs to be added to list indexes
   375  // when applying diffs to lists. Diffs are applied to lists beginning at the 0th
   376  // element. Changes to the list mean that subsequent changes to the same list
   377  // have to be adjusted accordingly. The stack keeps state for each list as it's
   378  // processed so updateNode() can get the correct index.
   379  // Whenever a list is encountered, diffs consist of add & remove operations. The
   380  // offset is calculated by keeping a count of each add & remove. Due to the way
   381  // way diffs are calculated, no offset is ever needed for 'add' operations. The
   382  // offset for 'remove' operations are calculated as:
   383  //
   384  //	stack.addCnt - stack.rmCnt
   385  func (stack *patchStack) adjustIndexOffset(p types.Path, changeType types.DiffChangeType) (res int) {
   386  	parentPath := p[:len(p)-1]
   387  
   388  	// parentPath is different than the last parentPath so reset counters
   389  	if stack.lastPath == nil || !stack.lastPath.Equals(parentPath) {
   390  		stack.lastPath = parentPath
   391  		stack.addCnt = 0
   392  		stack.rmCnt = 0
   393  	}
   394  
   395  	// offset for 'Add' operations are always 0, 'Remove' ops offset are
   396  	// calculated here
   397  	if changeType == types.DiffChangeRemoved {
   398  		res = stack.addCnt - stack.rmCnt
   399  	}
   400  
   401  	// Bump up the appropriate cnt for this operation.
   402  	switch changeType {
   403  	case types.DiffChangeAdded:
   404  		stack.addCnt += 1
   405  	case types.DiffChangeRemoved:
   406  		stack.rmCnt += 1
   407  	}
   408  	return
   409  }