golang.org/x/tools@v0.21.0/internal/diff/myers/diff.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package myers implements the Myers diff algorithm.
     6  package myers
     7  
     8  import (
     9  	"strings"
    10  
    11  	"golang.org/x/tools/internal/diff"
    12  )
    13  
    14  // Sources:
    15  // https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
    16  // https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
    17  
    18  // ComputeEdits returns the diffs of two strings using a simple
    19  // line-based implementation, like [diff.Strings].
    20  //
    21  // Deprecated: this implementation is moribund. However, when diffs
    22  // appear in marker test expectations, they are the particular diffs
    23  // produced by this implementation. The marker test framework
    24  // asserts diff(orig, got)==wantDiff, but ideally it would compute
    25  // got==apply(orig, wantDiff) so that the notation of the diff
    26  // is immaterial.
    27  func ComputeEdits(before, after string) []diff.Edit {
    28  	beforeLines := splitLines(before)
    29  	ops := operations(beforeLines, splitLines(after))
    30  
    31  	// Build a table mapping line number to offset.
    32  	lineOffsets := make([]int, 0, len(beforeLines)+1)
    33  	total := 0
    34  	for i := range beforeLines {
    35  		lineOffsets = append(lineOffsets, total)
    36  		total += len(beforeLines[i])
    37  	}
    38  	lineOffsets = append(lineOffsets, total) // EOF
    39  
    40  	edits := make([]diff.Edit, 0, len(ops))
    41  	for _, op := range ops {
    42  		start, end := lineOffsets[op.I1], lineOffsets[op.I2]
    43  		switch op.Kind {
    44  		case opDelete:
    45  			// Delete: before[I1:I2] is deleted.
    46  			edits = append(edits, diff.Edit{Start: start, End: end})
    47  		case opInsert:
    48  			// Insert: after[J1:J2] is inserted at before[I1:I1].
    49  			if content := strings.Join(op.Content, ""); content != "" {
    50  				edits = append(edits, diff.Edit{Start: start, End: end, New: content})
    51  			}
    52  		}
    53  	}
    54  	return edits
    55  }
    56  
    57  // opKind is used to denote the type of operation a line represents.
    58  type opKind int
    59  
    60  const (
    61  	opDelete opKind = iota // line deleted from input (-)
    62  	opInsert               // line inserted into output (+)
    63  	opEqual                // line present in input and output
    64  )
    65  
    66  func (kind opKind) String() string {
    67  	switch kind {
    68  	case opDelete:
    69  		return "delete"
    70  	case opInsert:
    71  		return "insert"
    72  	case opEqual:
    73  		return "equal"
    74  	default:
    75  		panic("unknown opKind")
    76  	}
    77  }
    78  
    79  type operation struct {
    80  	Kind    opKind
    81  	Content []string // content from b
    82  	I1, I2  int      // indices of the line in a
    83  	J1      int      // indices of the line in b, J2 implied by len(Content)
    84  }
    85  
    86  // operations returns the list of operations to convert a into b, consolidating
    87  // operations for multiple lines and not including equal lines.
    88  func operations(a, b []string) []*operation {
    89  	if len(a) == 0 && len(b) == 0 {
    90  		return nil
    91  	}
    92  
    93  	trace, offset := shortestEditSequence(a, b)
    94  	snakes := backtrack(trace, len(a), len(b), offset)
    95  
    96  	M, N := len(a), len(b)
    97  
    98  	var i int
    99  	solution := make([]*operation, len(a)+len(b))
   100  
   101  	add := func(op *operation, i2, j2 int) {
   102  		if op == nil {
   103  			return
   104  		}
   105  		op.I2 = i2
   106  		if op.Kind == opInsert {
   107  			op.Content = b[op.J1:j2]
   108  		}
   109  		solution[i] = op
   110  		i++
   111  	}
   112  	x, y := 0, 0
   113  	for _, snake := range snakes {
   114  		if len(snake) < 2 {
   115  			continue
   116  		}
   117  		var op *operation
   118  		// delete (horizontal)
   119  		for snake[0]-snake[1] > x-y {
   120  			if op == nil {
   121  				op = &operation{
   122  					Kind: opDelete,
   123  					I1:   x,
   124  					J1:   y,
   125  				}
   126  			}
   127  			x++
   128  			if x == M {
   129  				break
   130  			}
   131  		}
   132  		add(op, x, y)
   133  		op = nil
   134  		// insert (vertical)
   135  		for snake[0]-snake[1] < x-y {
   136  			if op == nil {
   137  				op = &operation{
   138  					Kind: opInsert,
   139  					I1:   x,
   140  					J1:   y,
   141  				}
   142  			}
   143  			y++
   144  		}
   145  		add(op, x, y)
   146  		op = nil
   147  		// equal (diagonal)
   148  		for x < snake[0] {
   149  			x++
   150  			y++
   151  		}
   152  		if x >= M && y >= N {
   153  			break
   154  		}
   155  	}
   156  	return solution[:i]
   157  }
   158  
   159  // backtrack uses the trace for the edit sequence computation and returns the
   160  // "snakes" that make up the solution. A "snake" is a single deletion or
   161  // insertion followed by zero or diagonals.
   162  func backtrack(trace [][]int, x, y, offset int) [][]int {
   163  	snakes := make([][]int, len(trace))
   164  	d := len(trace) - 1
   165  	for ; x > 0 && y > 0 && d > 0; d-- {
   166  		V := trace[d]
   167  		if len(V) == 0 {
   168  			continue
   169  		}
   170  		snakes[d] = []int{x, y}
   171  
   172  		k := x - y
   173  
   174  		var kPrev int
   175  		if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
   176  			kPrev = k + 1
   177  		} else {
   178  			kPrev = k - 1
   179  		}
   180  
   181  		x = V[kPrev+offset]
   182  		y = x - kPrev
   183  	}
   184  	if x < 0 || y < 0 {
   185  		return snakes
   186  	}
   187  	snakes[d] = []int{x, y}
   188  	return snakes
   189  }
   190  
   191  // shortestEditSequence returns the shortest edit sequence that converts a into b.
   192  func shortestEditSequence(a, b []string) ([][]int, int) {
   193  	M, N := len(a), len(b)
   194  	V := make([]int, 2*(N+M)+1)
   195  	offset := N + M
   196  	trace := make([][]int, N+M+1)
   197  
   198  	// Iterate through the maximum possible length of the SES (N+M).
   199  	for d := 0; d <= N+M; d++ {
   200  		copyV := make([]int, len(V))
   201  		// k lines are represented by the equation y = x - k. We move in
   202  		// increments of 2 because end points for even d are on even k lines.
   203  		for k := -d; k <= d; k += 2 {
   204  			// At each point, we either go down or to the right. We go down if
   205  			// k == -d, and we go to the right if k == d. We also prioritize
   206  			// the maximum x value, because we prefer deletions to insertions.
   207  			var x int
   208  			if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
   209  				x = V[k+1+offset] // down
   210  			} else {
   211  				x = V[k-1+offset] + 1 // right
   212  			}
   213  
   214  			y := x - k
   215  
   216  			// Diagonal moves while we have equal contents.
   217  			for x < M && y < N && a[x] == b[y] {
   218  				x++
   219  				y++
   220  			}
   221  
   222  			V[k+offset] = x
   223  
   224  			// Return if we've exceeded the maximum values.
   225  			if x == M && y == N {
   226  				// Makes sure to save the state of the array before returning.
   227  				copy(copyV, V)
   228  				trace[d] = copyV
   229  				return trace, offset
   230  			}
   231  		}
   232  
   233  		// Save the state of the array.
   234  		copy(copyV, V)
   235  		trace[d] = copyV
   236  	}
   237  	return nil, 0
   238  }
   239  
   240  func splitLines(text string) []string {
   241  	lines := strings.SplitAfter(text, "\n")
   242  	if lines[len(lines)-1] == "" {
   243  		lines = lines[:len(lines)-1]
   244  	}
   245  	return lines
   246  }