github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/internal/diff/myers/diff.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This code is a modified copy of code in gopls. 6 // It's not as efficient as it could be, it allocates way too much, the API is sloppy. 7 // But it's only used to print failed tests, so we don't care. 8 9 package myers 10 11 import ( 12 "fmt" 13 "strings" 14 ) 15 16 // OpKind is used to denote the type of operation a line represents. 17 type OpKind int 18 19 const ( 20 // Delete is the operation kind for a line that is present in the input 21 // but not in the output. 22 Delete OpKind = iota 23 // Insert is the operation kind for a line that is new in the output. 24 Insert 25 // Equal is the operation kind for a line that is the same in the input and 26 // output, often used to provide context around edited lines. 27 Equal 28 ) 29 30 // String returns a human readable representation of an OpKind. It is not 31 // intended for machine processing. 32 func (k OpKind) String() string { 33 switch k { 34 case Delete: 35 return "delete" 36 case Insert: 37 return "insert" 38 case Equal: 39 return "equal" 40 default: 41 panic("unknown operation kind") 42 } 43 } 44 45 // Sources: 46 // https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/ 47 // https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2 48 49 func ComputeEdits(before, after string) []*operation { 50 ops := operations(splitLines(before), splitLines(after)) 51 return ops 52 } 53 54 type operation struct { 55 Kind OpKind 56 Content []string // content from b 57 I1, I2 int // indices of the line in a 58 J1 int // indices of the line in b, J2 implied by len(Content) 59 } 60 61 func (op *operation) String() string { 62 var prefix string 63 switch op.Kind { 64 case Delete: 65 prefix = "-" 66 case Insert: 67 prefix = "+" 68 case Equal: 69 prefix = " " 70 } 71 out := "" 72 for _, line := range op.Content { 73 out += fmt.Sprintf("%s%s", prefix, line) 74 } 75 return out 76 } 77 78 // operations returns the list of operations to convert a into b, consolidating 79 // operations for multiple lines and not including equal lines. 80 func operations(a, b []string) []*operation { 81 if len(a) == 0 && len(b) == 0 { 82 return nil 83 } 84 85 trace, offset := shortestEditSequence(a, b) 86 snakes := backtrack(trace, len(a), len(b), offset) 87 88 M, N := len(a), len(b) 89 90 var i int 91 solution := make([]*operation, len(a)+len(b)) 92 93 add := func(op *operation, i2, j2 int) { 94 if op == nil { 95 return 96 } 97 op.I2 = i2 98 switch op.Kind { 99 case Insert: 100 op.Content = b[op.J1:j2] 101 case Delete: 102 op.Content = a[op.I1:op.I2] 103 case Equal: 104 op.Content = a[op.I1:op.I2] 105 } 106 solution[i] = op 107 i++ 108 } 109 x, y := 0, 0 110 for _, snake := range snakes { 111 if len(snake) < 2 { 112 continue 113 } 114 var op *operation 115 // delete (horizontal) 116 for snake[0]-snake[1] > x-y { 117 if op == nil { 118 op = &operation{ 119 Kind: Delete, 120 I1: x, 121 J1: y, 122 } 123 } 124 x++ 125 if x == M { 126 break 127 } 128 } 129 add(op, x, y) 130 op = nil 131 // insert (vertical) 132 for snake[0]-snake[1] < x-y { 133 if op == nil { 134 op = &operation{ 135 Kind: Insert, 136 I1: x, 137 J1: y, 138 } 139 } 140 y++ 141 } 142 add(op, x, y) 143 op = nil 144 // equal (diagonal) 145 for x < snake[0] { 146 if op == nil { 147 op = &operation{ 148 Kind: Equal, 149 I1: x, 150 J1: y, 151 } 152 } 153 x++ 154 y++ 155 } 156 add(op, x, y) 157 if x >= M && y >= N { 158 break 159 } 160 } 161 return solution[:i] 162 } 163 164 // backtrack uses the trace for the edit sequence computation and returns the 165 // "snakes" that make up the solution. A "snake" is a single deletion or 166 // insertion followed by zero or diagonals. 167 func backtrack(trace [][]int, x, y, offset int) [][]int { 168 snakes := make([][]int, len(trace)) 169 d := len(trace) - 1 170 for ; x > 0 && y > 0 && d > 0; d-- { 171 V := trace[d] 172 if len(V) == 0 { 173 continue 174 } 175 snakes[d] = []int{x, y} 176 177 k := x - y 178 179 var kPrev int 180 if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { 181 kPrev = k + 1 182 } else { 183 kPrev = k - 1 184 } 185 186 x = V[kPrev+offset] 187 y = x - kPrev 188 } 189 if x < 0 || y < 0 { 190 return snakes 191 } 192 snakes[d] = []int{x, y} 193 return snakes 194 } 195 196 // shortestEditSequence returns the shortest edit sequence that converts a into b. 197 func shortestEditSequence(a, b []string) ([][]int, int) { 198 M, N := len(a), len(b) 199 V := make([]int, 2*(N+M)+1) 200 offset := N + M 201 trace := make([][]int, N+M+1) 202 203 // Iterate through the maximum possible length of the SES (N+M). 204 for d := 0; d <= N+M; d++ { 205 copyV := make([]int, len(V)) 206 // k lines are represented by the equation y = x - k. We move in 207 // increments of 2 because end points for even d are on even k lines. 208 for k := -d; k <= d; k += 2 { 209 // At each point, we either go down or to the right. We go down if 210 // k == -d, and we go to the right if k == d. We also prioritize 211 // the maximum x value, because we prefer deletions to insertions. 212 var x int 213 if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { 214 x = V[k+1+offset] // down 215 } else { 216 x = V[k-1+offset] + 1 // right 217 } 218 219 y := x - k 220 221 // Diagonal moves while we have equal contents. 222 for x < M && y < N && a[x] == b[y] { 223 x++ 224 y++ 225 } 226 227 V[k+offset] = x 228 229 // Return if we've exceeded the maximum values. 230 if x == M && y == N { 231 // Makes sure to save the state of the array before returning. 232 copy(copyV, V) 233 trace[d] = copyV 234 return trace, offset 235 } 236 } 237 238 // Save the state of the array. 239 copy(copyV, V) 240 trace[d] = copyV 241 } 242 return nil, 0 243 } 244 245 func splitLines(text string) []string { 246 lines := strings.SplitAfter(text, "\n") 247 if lines[len(lines)-1] == "" { 248 lines = lines[:len(lines)-1] 249 } 250 return lines 251 }