github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/three_way_differ.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  
    26  	"github.com/dolthub/dolt/go/store/val"
    27  )
    28  
    29  // ThreeWayDiffer is an iterator that gives an increased level of granularity
    30  // of diffs between three root values. See diffOp for the classes of diffs.
    31  type ThreeWayDiffer[K ~[]byte, O Ordering[K]] struct {
    32  	lIter, rIter              Differ[K, O]
    33  	resolveCb                 resolveCb
    34  	lDiff                     Diff
    35  	rDiff                     Diff
    36  	lDone                     bool
    37  	rDone                     bool
    38  	keyless                   bool
    39  	leftAndRightSchemasDiffer bool
    40  }
    41  
    42  //var _ DiffIter = (*threeWayDiffer[Item, val.TupleDesc])(nil)
    43  
    44  type resolveCb func(*sql.Context, val.Tuple, val.Tuple, val.Tuple) (val.Tuple, bool, error)
    45  
    46  // ThreeWayDiffInfo stores contextual data that can influence the diff.
    47  // If |LeftSchemaChange| is true, then the left side's bytes have a different interpretation from the base,
    48  // so every row in both Left and Base should be considered a modification, even if they have the same bytes.
    49  // If |RightSchemaChange| is true, then the right side's bytes have a different interpretation from the base,
    50  // so every row in both Right and Base should be considered a modification, even if they have the same bytes.
    51  // Note that these values aren't set for schema changes that have no effect on the meaning of the bytes,
    52  // such as collation.
    53  // If |LeftAndRightSchemasDiffer| is true, then the left and right sides of the diff have a different interpretation
    54  // of their bytes, so there cannot be any convergent edits, even if two rows in Left and Right have the same bytes.
    55  type ThreeWayDiffInfo struct {
    56  	LeftSchemaChange          bool
    57  	RightSchemaChange         bool
    58  	LeftAndRightSchemasDiffer bool
    59  }
    60  
    61  func NewThreeWayDiffer[K, V ~[]byte, O Ordering[K]](
    62  	ctx context.Context,
    63  	ns NodeStore,
    64  	left StaticMap[K, V, O],
    65  	right StaticMap[K, V, O],
    66  	base StaticMap[K, V, O],
    67  	resolveCb resolveCb,
    68  	keyless bool,
    69  	diffInfo ThreeWayDiffInfo,
    70  	order O,
    71  ) (*ThreeWayDiffer[K, O], error) {
    72  	// probably compute each of these separately
    73  	ld, err := DifferFromRoots[K](ctx, ns, ns, base.Root, left.Root, order, diffInfo.LeftSchemaChange)
    74  	if err != nil {
    75  		return nil, err
    76  	}
    77  
    78  	rd, err := DifferFromRoots[K](ctx, ns, ns, base.Root, right.Root, order, diffInfo.RightSchemaChange)
    79  	if err != nil {
    80  		return nil, err
    81  	}
    82  
    83  	return &ThreeWayDiffer[K, O]{
    84  		lIter:                     ld,
    85  		rIter:                     rd,
    86  		resolveCb:                 resolveCb,
    87  		keyless:                   keyless,
    88  		leftAndRightSchemasDiffer: diffInfo.LeftAndRightSchemasDiffer,
    89  	}, nil
    90  }
    91  
    92  type threeWayDiffState uint8
    93  
    94  const (
    95  	dsUnknown threeWayDiffState = iota
    96  	dsInit
    97  	dsDiffFinalize
    98  	dsCompare
    99  	dsNewLeft
   100  	dsNewRight
   101  	dsMatch
   102  	dsMatchFinalize
   103  )
   104  
   105  func (d *ThreeWayDiffer[K, O]) Next(ctx *sql.Context) (ThreeWayDiff, error) {
   106  	var err error
   107  	var res ThreeWayDiff
   108  	nextState := dsInit
   109  	for {
   110  		// The regular flow will be:
   111  		// - dsInit: get the first diff in each iterator if this is the first Next
   112  		// - dsDiffFinalize: short-circuit comparing if one iterator is exhausted
   113  		// - dsCompare: compare keys for the leading diffs, to determine whether
   114  		//   the diffs are independent, or require further disambiguation.
   115  		// - dsNewLeft: an edit was made to the left root value for a key not edited
   116  		//   on the right.
   117  		// - dsNewRight: ditto above, edit to key only on right.
   118  		// - dsMatch: edits made to the same key in left and right roots, either
   119  		//   resolve non-overlapping field changes or indicate schema/value conflict.
   120  		// - dsMatchFinalize: increment both iters after performing match disambiguation.
   121  		switch nextState {
   122  		case dsInit:
   123  			if !d.lDone {
   124  				if d.lDiff.Key == nil {
   125  					d.lDiff, err = d.lIter.Next(ctx)
   126  					if errors.Is(err, io.EOF) {
   127  						d.lDone = true
   128  					} else if err != nil {
   129  						return ThreeWayDiff{}, err
   130  					}
   131  				}
   132  			}
   133  			if !d.rDone {
   134  				if d.rDiff.Key == nil {
   135  					d.rDiff, err = d.rIter.Next(ctx)
   136  					if errors.Is(err, io.EOF) {
   137  						d.rDone = true
   138  					} else if err != nil {
   139  						return ThreeWayDiff{}, err
   140  					}
   141  				}
   142  			}
   143  			nextState = dsDiffFinalize
   144  		case dsDiffFinalize:
   145  			if d.lDone && d.rDone {
   146  				return ThreeWayDiff{}, io.EOF
   147  			} else if d.lDone {
   148  				nextState = dsNewRight
   149  			} else if d.rDone {
   150  				nextState = dsNewLeft
   151  			} else {
   152  				nextState = dsCompare
   153  			}
   154  		case dsCompare:
   155  			cmp := d.lIter.order.Compare(K(d.lDiff.Key), K(d.rDiff.Key))
   156  			switch {
   157  			case cmp < 0:
   158  				nextState = dsNewLeft
   159  			case cmp == 0:
   160  				nextState = dsMatch
   161  			case cmp > 0:
   162  				nextState = dsNewRight
   163  			default:
   164  			}
   165  		case dsNewLeft:
   166  			res = d.newLeftEdit(d.lDiff.Key, d.lDiff.To, d.lDiff.Type)
   167  			d.lDiff, err = d.lIter.Next(ctx)
   168  			if errors.Is(err, io.EOF) {
   169  				d.lDone = true
   170  			} else if err != nil {
   171  				return ThreeWayDiff{}, err
   172  			}
   173  			return res, nil
   174  		case dsNewRight:
   175  			res = d.newRightEdit(d.rDiff.Key, d.rDiff.From, d.rDiff.To, d.rDiff.Type)
   176  			d.rDiff, err = d.rIter.Next(ctx)
   177  			if errors.Is(err, io.EOF) {
   178  				d.rDone = true
   179  			} else if err != nil {
   180  				return ThreeWayDiff{}, err
   181  			}
   182  			return res, nil
   183  		case dsMatch:
   184  			if d.lDiff.To == nil && d.rDiff.To == nil {
   185  				res = d.newConvergentEdit(d.lDiff.Key, d.lDiff.To, d.lDiff.Type)
   186  			} else if d.lDiff.To == nil || d.rDiff.To == nil {
   187  				// Divergent delete. Attempt to resolve.
   188  				_, ok, err := d.resolveCb(ctx, val.Tuple(d.lDiff.To), val.Tuple(d.rDiff.To), val.Tuple(d.lDiff.From))
   189  				if err != nil {
   190  					return ThreeWayDiff{}, err
   191  				}
   192  				if !ok {
   193  					res = d.newDivergentDeleteConflict(d.lDiff.Key, d.lDiff.From, d.lDiff.To, d.rDiff.To)
   194  				} else {
   195  					res = d.newDivergentDeleteResolved(d.lDiff.Key, d.lDiff.From, d.lDiff.To, d.rDiff.To)
   196  				}
   197  			} else if d.lDiff.Type == d.rDiff.Type && bytes.Equal(d.lDiff.To, d.rDiff.To) {
   198  				res = d.newConvergentEdit(d.lDiff.Key, d.lDiff.To, d.lDiff.Type)
   199  			} else {
   200  				resolved, ok, err := d.resolveCb(ctx, val.Tuple(d.lDiff.To), val.Tuple(d.rDiff.To), val.Tuple(d.lDiff.From))
   201  				if err != nil {
   202  					return ThreeWayDiff{}, err
   203  				}
   204  				if !ok {
   205  					res = d.newDivergentClashConflict(d.lDiff.Key, d.lDiff.From, d.lDiff.To, d.rDiff.To)
   206  				} else {
   207  					res = d.newDivergentResolved(d.lDiff.Key, d.lDiff.To, d.rDiff.To, Item(resolved))
   208  				}
   209  			}
   210  			nextState = dsMatchFinalize
   211  		case dsMatchFinalize:
   212  			d.lDiff, err = d.lIter.Next(ctx)
   213  			if errors.Is(err, io.EOF) {
   214  				d.lDone = true
   215  			} else if err != nil {
   216  				return ThreeWayDiff{}, err
   217  			}
   218  
   219  			d.rDiff, err = d.rIter.Next(ctx)
   220  			if errors.Is(err, io.EOF) {
   221  				d.rDone = true
   222  			} else if err != nil {
   223  				return ThreeWayDiff{}, err
   224  			}
   225  
   226  			return res, nil
   227  		default:
   228  			panic(fmt.Sprintf("unknown threeWayDiffState: %d", nextState))
   229  		}
   230  	}
   231  }
   232  
   233  func (d *ThreeWayDiffer[K, O]) Close() error {
   234  	return nil
   235  }
   236  
   237  //go:generate stringer -type=diffOp -linecomment
   238  
   239  type DiffOp uint16
   240  
   241  const (
   242  	DiffOpLeftAdd                 DiffOp = iota // leftAdd
   243  	DiffOpRightAdd                              // rightAdd
   244  	DiffOpLeftDelete                            //leftDelete
   245  	DiffOpRightDelete                           //rightDelete
   246  	DiffOpLeftModify                            //leftModify
   247  	DiffOpRightModify                           //rightModify
   248  	DiffOpConvergentAdd                         //convergentAdd
   249  	DiffOpConvergentDelete                      //convergentDelete
   250  	DiffOpConvergentModify                      //convergentModify
   251  	DiffOpDivergentModifyResolved               //divergentModifyResolved
   252  	DiffOpDivergentDeleteConflict               //divergentDeleteConflict
   253  	DiffOpDivergentModifyConflict               //divergentModifyConflict
   254  	DiffOpDivergentDeleteResolved               //divergentDeleteConflict
   255  )
   256  
   257  // ThreeWayDiff is a generic object for encoding a three way diff.
   258  type ThreeWayDiff struct {
   259  	// Op indicates the type of diff
   260  	Op DiffOp
   261  	// a partial set of tuple values are set
   262  	// depending on the diffOp
   263  	Key, Base, Left, Right, Merged val.Tuple
   264  }
   265  
   266  func (d *ThreeWayDiffer[K, O]) newLeftEdit(key, left Item, typ DiffType) ThreeWayDiff {
   267  	var op DiffOp
   268  	switch typ {
   269  	case AddedDiff:
   270  		op = DiffOpLeftAdd
   271  	case ModifiedDiff:
   272  		op = DiffOpLeftModify
   273  	case RemovedDiff:
   274  		op = DiffOpLeftDelete
   275  	default:
   276  		panic("unknown diff type")
   277  	}
   278  	return ThreeWayDiff{
   279  		Op:   op,
   280  		Key:  val.Tuple(key),
   281  		Left: val.Tuple(left),
   282  	}
   283  }
   284  
   285  func (d *ThreeWayDiffer[K, O]) newRightEdit(key, base, right Item, typ DiffType) ThreeWayDiff {
   286  	var op DiffOp
   287  	switch typ {
   288  	case AddedDiff:
   289  		op = DiffOpRightAdd
   290  	case ModifiedDiff:
   291  		op = DiffOpRightModify
   292  	case RemovedDiff:
   293  		op = DiffOpRightDelete
   294  	default:
   295  		panic("unknown diff type")
   296  	}
   297  	return ThreeWayDiff{
   298  		Op:    op,
   299  		Key:   val.Tuple(key),
   300  		Base:  val.Tuple(base),
   301  		Right: val.Tuple(right),
   302  	}
   303  }
   304  
   305  func (d *ThreeWayDiffer[K, O]) newConvergentEdit(key, left Item, typ DiffType) ThreeWayDiff {
   306  	var op DiffOp
   307  	switch typ {
   308  	case AddedDiff:
   309  		op = DiffOpConvergentAdd
   310  	case ModifiedDiff:
   311  		op = DiffOpConvergentModify
   312  	case RemovedDiff:
   313  		op = DiffOpConvergentDelete
   314  	default:
   315  		panic("unknown diff type")
   316  	}
   317  	return ThreeWayDiff{
   318  		Op:   op,
   319  		Key:  val.Tuple(key),
   320  		Left: val.Tuple(left),
   321  	}
   322  }
   323  
   324  func (d *ThreeWayDiffer[K, O]) newDivergentResolved(key, left, right, merged Item) ThreeWayDiff {
   325  	return ThreeWayDiff{
   326  		Op:     DiffOpDivergentModifyResolved,
   327  		Key:    val.Tuple(key),
   328  		Left:   val.Tuple(left),
   329  		Right:  val.Tuple(right),
   330  		Merged: val.Tuple(merged),
   331  	}
   332  }
   333  
   334  func (d *ThreeWayDiffer[K, O]) newDivergentDeleteConflict(key, base, left, right Item) ThreeWayDiff {
   335  	return ThreeWayDiff{
   336  		Op:    DiffOpDivergentDeleteConflict,
   337  		Key:   val.Tuple(key),
   338  		Base:  val.Tuple(base),
   339  		Left:  val.Tuple(left),
   340  		Right: val.Tuple(right),
   341  	}
   342  }
   343  
   344  func (d *ThreeWayDiffer[K, O]) newDivergentDeleteResolved(key, base, left, right Item) ThreeWayDiff {
   345  	return ThreeWayDiff{
   346  		Op:    DiffOpDivergentDeleteResolved,
   347  		Key:   val.Tuple(key),
   348  		Base:  val.Tuple(base),
   349  		Left:  val.Tuple(left),
   350  		Right: val.Tuple(right),
   351  	}
   352  }
   353  
   354  func (d *ThreeWayDiffer[K, O]) newDivergentClashConflict(key, base, left, right Item) ThreeWayDiff {
   355  	return ThreeWayDiff{
   356  		Op:    DiffOpDivergentModifyConflict,
   357  		Key:   val.Tuple(key),
   358  		Base:  val.Tuple(base),
   359  		Left:  val.Tuple(left),
   360  		Right: val.Tuple(right),
   361  	}
   362  }