github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/internal/diff/myers/diff.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This code is a modified copy of code in gopls.
     6  // It's not as efficient as it could be, it allocates way too much, the API is sloppy.
     7  // But it's only used to print failed tests, so we don't care.
     8  
     9  package myers
    10  
    11  import (
    12  	"fmt"
    13  	"strings"
    14  )
    15  
    16  // OpKind is used to denote the type of operation a line represents.
    17  type OpKind int
    18  
    19  const (
    20  	// Delete is the operation kind for a line that is present in the input
    21  	// but not in the output.
    22  	Delete OpKind = iota
    23  	// Insert is the operation kind for a line that is new in the output.
    24  	Insert
    25  	// Equal is the operation kind for a line that is the same in the input and
    26  	// output, often used to provide context around edited lines.
    27  	Equal
    28  )
    29  
    30  // String returns a human readable representation of an OpKind. It is not
    31  // intended for machine processing.
    32  func (k OpKind) String() string {
    33  	switch k {
    34  	case Delete:
    35  		return "delete"
    36  	case Insert:
    37  		return "insert"
    38  	case Equal:
    39  		return "equal"
    40  	default:
    41  		panic("unknown operation kind")
    42  	}
    43  }
    44  
    45  // Sources:
    46  // https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
    47  // https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
    48  
    49  func ComputeEdits(before, after string) []*operation {
    50  	ops := operations(splitLines(before), splitLines(after))
    51  	return ops
    52  }
    53  
    54  type operation struct {
    55  	Kind    OpKind
    56  	Content []string // content from b
    57  	I1, I2  int      // indices of the line in a
    58  	J1      int      // indices of the line in b, J2 implied by len(Content)
    59  }
    60  
    61  func (op *operation) String() string {
    62  	var prefix string
    63  	switch op.Kind {
    64  	case Delete:
    65  		prefix = "-"
    66  	case Insert:
    67  		prefix = "+"
    68  	case Equal:
    69  		prefix = " "
    70  	}
    71  	out := ""
    72  	for _, line := range op.Content {
    73  		out += fmt.Sprintf("%s%s", prefix, line)
    74  	}
    75  	return out
    76  }
    77  
    78  // operations returns the list of operations to convert a into b, consolidating
    79  // operations for multiple lines and not including equal lines.
    80  func operations(a, b []string) []*operation {
    81  	if len(a) == 0 && len(b) == 0 {
    82  		return nil
    83  	}
    84  
    85  	trace, offset := shortestEditSequence(a, b)
    86  	snakes := backtrack(trace, len(a), len(b), offset)
    87  
    88  	M, N := len(a), len(b)
    89  
    90  	var i int
    91  	solution := make([]*operation, len(a)+len(b))
    92  
    93  	add := func(op *operation, i2, j2 int) {
    94  		if op == nil {
    95  			return
    96  		}
    97  		op.I2 = i2
    98  		switch op.Kind {
    99  		case Insert:
   100  			op.Content = b[op.J1:j2]
   101  		case Delete:
   102  			op.Content = a[op.I1:op.I2]
   103  		case Equal:
   104  			op.Content = a[op.I1:op.I2]
   105  		}
   106  		solution[i] = op
   107  		i++
   108  	}
   109  	x, y := 0, 0
   110  	for _, snake := range snakes {
   111  		if len(snake) < 2 {
   112  			continue
   113  		}
   114  		var op *operation
   115  		// delete (horizontal)
   116  		for snake[0]-snake[1] > x-y {
   117  			if op == nil {
   118  				op = &operation{
   119  					Kind: Delete,
   120  					I1:   x,
   121  					J1:   y,
   122  				}
   123  			}
   124  			x++
   125  			if x == M {
   126  				break
   127  			}
   128  		}
   129  		add(op, x, y)
   130  		op = nil
   131  		// insert (vertical)
   132  		for snake[0]-snake[1] < x-y {
   133  			if op == nil {
   134  				op = &operation{
   135  					Kind: Insert,
   136  					I1:   x,
   137  					J1:   y,
   138  				}
   139  			}
   140  			y++
   141  		}
   142  		add(op, x, y)
   143  		op = nil
   144  		// equal (diagonal)
   145  		for x < snake[0] {
   146  			if op == nil {
   147  				op = &operation{
   148  					Kind: Equal,
   149  					I1:   x,
   150  					J1:   y,
   151  				}
   152  			}
   153  			x++
   154  			y++
   155  		}
   156  		add(op, x, y)
   157  		if x >= M && y >= N {
   158  			break
   159  		}
   160  	}
   161  	return solution[:i]
   162  }
   163  
   164  // backtrack uses the trace for the edit sequence computation and returns the
   165  // "snakes" that make up the solution. A "snake" is a single deletion or
   166  // insertion followed by zero or diagonals.
   167  func backtrack(trace [][]int, x, y, offset int) [][]int {
   168  	snakes := make([][]int, len(trace))
   169  	d := len(trace) - 1
   170  	for ; x > 0 && y > 0 && d > 0; d-- {
   171  		V := trace[d]
   172  		if len(V) == 0 {
   173  			continue
   174  		}
   175  		snakes[d] = []int{x, y}
   176  
   177  		k := x - y
   178  
   179  		var kPrev int
   180  		if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
   181  			kPrev = k + 1
   182  		} else {
   183  			kPrev = k - 1
   184  		}
   185  
   186  		x = V[kPrev+offset]
   187  		y = x - kPrev
   188  	}
   189  	if x < 0 || y < 0 {
   190  		return snakes
   191  	}
   192  	snakes[d] = []int{x, y}
   193  	return snakes
   194  }
   195  
   196  // shortestEditSequence returns the shortest edit sequence that converts a into b.
   197  func shortestEditSequence(a, b []string) ([][]int, int) {
   198  	M, N := len(a), len(b)
   199  	V := make([]int, 2*(N+M)+1)
   200  	offset := N + M
   201  	trace := make([][]int, N+M+1)
   202  
   203  	// Iterate through the maximum possible length of the SES (N+M).
   204  	for d := 0; d <= N+M; d++ {
   205  		copyV := make([]int, len(V))
   206  		// k lines are represented by the equation y = x - k. We move in
   207  		// increments of 2 because end points for even d are on even k lines.
   208  		for k := -d; k <= d; k += 2 {
   209  			// At each point, we either go down or to the right. We go down if
   210  			// k == -d, and we go to the right if k == d. We also prioritize
   211  			// the maximum x value, because we prefer deletions to insertions.
   212  			var x int
   213  			if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
   214  				x = V[k+1+offset] // down
   215  			} else {
   216  				x = V[k-1+offset] + 1 // right
   217  			}
   218  
   219  			y := x - k
   220  
   221  			// Diagonal moves while we have equal contents.
   222  			for x < M && y < N && a[x] == b[y] {
   223  				x++
   224  				y++
   225  			}
   226  
   227  			V[k+offset] = x
   228  
   229  			// Return if we've exceeded the maximum values.
   230  			if x == M && y == N {
   231  				// Makes sure to save the state of the array before returning.
   232  				copy(copyV, V)
   233  				trace[d] = copyV
   234  				return trace, offset
   235  			}
   236  		}
   237  
   238  		// Save the state of the array.
   239  		copy(copyV, V)
   240  		trace[d] = copyV
   241  	}
   242  	return nil, 0
   243  }
   244  
   245  func splitLines(text string) []string {
   246  	lines := strings.SplitAfter(text, "\n")
   247  	if lines[len(lines)-1] == "" {
   248  		lines = lines[:len(lines)-1]
   249  	}
   250  	return lines
   251  }