github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/internal/diff/myers/diff.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package myers implements the Myers diff algorithm.
     6  package myers
     7  
     8  import (
     9  	"strings"
    10  
    11  	"golang.org/x/tools/internal/diff"
    12  )
    13  
    14  // Sources:
    15  // https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
    16  // https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
    17  
    18  func ComputeEdits(before, after string) []diff.Edit {
    19  	beforeLines := splitLines(before)
    20  	ops := operations(beforeLines, splitLines(after))
    21  
    22  	// Build a table mapping line number to offset.
    23  	lineOffsets := make([]int, 0, len(beforeLines)+1)
    24  	total := 0
    25  	for i := range beforeLines {
    26  		lineOffsets = append(lineOffsets, total)
    27  		total += len(beforeLines[i])
    28  	}
    29  	lineOffsets = append(lineOffsets, total) // EOF
    30  
    31  	edits := make([]diff.Edit, 0, len(ops))
    32  	for _, op := range ops {
    33  		start, end := lineOffsets[op.I1], lineOffsets[op.I2]
    34  		switch op.Kind {
    35  		case diff.Delete:
    36  			// Delete: before[I1:I2] is deleted.
    37  			edits = append(edits, diff.Edit{Start: start, End: end})
    38  		case diff.Insert:
    39  			// Insert: after[J1:J2] is inserted at before[I1:I1].
    40  			if content := strings.Join(op.Content, ""); content != "" {
    41  				edits = append(edits, diff.Edit{Start: start, End: end, New: content})
    42  			}
    43  		}
    44  	}
    45  	return edits
    46  }
    47  
    48  type operation struct {
    49  	Kind    diff.OpKind
    50  	Content []string // content from b
    51  	I1, I2  int      // indices of the line in a
    52  	J1      int      // indices of the line in b, J2 implied by len(Content)
    53  }
    54  
    55  // operations returns the list of operations to convert a into b, consolidating
    56  // operations for multiple lines and not including equal lines.
    57  func operations(a, b []string) []*operation {
    58  	if len(a) == 0 && len(b) == 0 {
    59  		return nil
    60  	}
    61  
    62  	trace, offset := shortestEditSequence(a, b)
    63  	snakes := backtrack(trace, len(a), len(b), offset)
    64  
    65  	M, N := len(a), len(b)
    66  
    67  	var i int
    68  	solution := make([]*operation, len(a)+len(b))
    69  
    70  	add := func(op *operation, i2, j2 int) {
    71  		if op == nil {
    72  			return
    73  		}
    74  		op.I2 = i2
    75  		if op.Kind == diff.Insert {
    76  			op.Content = b[op.J1:j2]
    77  		}
    78  		solution[i] = op
    79  		i++
    80  	}
    81  	x, y := 0, 0
    82  	for _, snake := range snakes {
    83  		if len(snake) < 2 {
    84  			continue
    85  		}
    86  		var op *operation
    87  		// delete (horizontal)
    88  		for snake[0]-snake[1] > x-y {
    89  			if op == nil {
    90  				op = &operation{
    91  					Kind: diff.Delete,
    92  					I1:   x,
    93  					J1:   y,
    94  				}
    95  			}
    96  			x++
    97  			if x == M {
    98  				break
    99  			}
   100  		}
   101  		add(op, x, y)
   102  		op = nil
   103  		// insert (vertical)
   104  		for snake[0]-snake[1] < x-y {
   105  			if op == nil {
   106  				op = &operation{
   107  					Kind: diff.Insert,
   108  					I1:   x,
   109  					J1:   y,
   110  				}
   111  			}
   112  			y++
   113  		}
   114  		add(op, x, y)
   115  		op = nil
   116  		// equal (diagonal)
   117  		for x < snake[0] {
   118  			x++
   119  			y++
   120  		}
   121  		if x >= M && y >= N {
   122  			break
   123  		}
   124  	}
   125  	return solution[:i]
   126  }
   127  
   128  // backtrack uses the trace for the edit sequence computation and returns the
   129  // "snakes" that make up the solution. A "snake" is a single deletion or
   130  // insertion followed by zero or diagonals.
   131  func backtrack(trace [][]int, x, y, offset int) [][]int {
   132  	snakes := make([][]int, len(trace))
   133  	d := len(trace) - 1
   134  	for ; x > 0 && y > 0 && d > 0; d-- {
   135  		V := trace[d]
   136  		if len(V) == 0 {
   137  			continue
   138  		}
   139  		snakes[d] = []int{x, y}
   140  
   141  		k := x - y
   142  
   143  		var kPrev int
   144  		if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
   145  			kPrev = k + 1
   146  		} else {
   147  			kPrev = k - 1
   148  		}
   149  
   150  		x = V[kPrev+offset]
   151  		y = x - kPrev
   152  	}
   153  	if x < 0 || y < 0 {
   154  		return snakes
   155  	}
   156  	snakes[d] = []int{x, y}
   157  	return snakes
   158  }
   159  
   160  // shortestEditSequence returns the shortest edit sequence that converts a into b.
   161  func shortestEditSequence(a, b []string) ([][]int, int) {
   162  	M, N := len(a), len(b)
   163  	V := make([]int, 2*(N+M)+1)
   164  	offset := N + M
   165  	trace := make([][]int, N+M+1)
   166  
   167  	// Iterate through the maximum possible length of the SES (N+M).
   168  	for d := 0; d <= N+M; d++ {
   169  		copyV := make([]int, len(V))
   170  		// k lines are represented by the equation y = x - k. We move in
   171  		// increments of 2 because end points for even d are on even k lines.
   172  		for k := -d; k <= d; k += 2 {
   173  			// At each point, we either go down or to the right. We go down if
   174  			// k == -d, and we go to the right if k == d. We also prioritize
   175  			// the maximum x value, because we prefer deletions to insertions.
   176  			var x int
   177  			if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
   178  				x = V[k+1+offset] // down
   179  			} else {
   180  				x = V[k-1+offset] + 1 // right
   181  			}
   182  
   183  			y := x - k
   184  
   185  			// Diagonal moves while we have equal contents.
   186  			for x < M && y < N && a[x] == b[y] {
   187  				x++
   188  				y++
   189  			}
   190  
   191  			V[k+offset] = x
   192  
   193  			// Return if we've exceeded the maximum values.
   194  			if x == M && y == N {
   195  				// Makes sure to save the state of the array before returning.
   196  				copy(copyV, V)
   197  				trace[d] = copyV
   198  				return trace, offset
   199  			}
   200  		}
   201  
   202  		// Save the state of the array.
   203  		copy(copyV, V)
   204  		trace[d] = copyV
   205  	}
   206  	return nil, 0
   207  }
   208  
   209  func splitLines(text string) []string {
   210  	lines := strings.SplitAfter(text, "\n")
   211  	if lines[len(lines)-1] == "" {
   212  		lines = lines[:len(lines)-1]
   213  	}
   214  	return lines
   215  }