github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/internal/diff/myers/diff.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package myers implements the Myers diff algorithm. 6 package myers 7 8 import ( 9 "strings" 10 11 "golang.org/x/tools/internal/diff" 12 ) 13 14 // Sources: 15 // https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/ 16 // https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2 17 18 func ComputeEdits(before, after string) []diff.Edit { 19 beforeLines := splitLines(before) 20 ops := operations(beforeLines, splitLines(after)) 21 22 // Build a table mapping line number to offset. 23 lineOffsets := make([]int, 0, len(beforeLines)+1) 24 total := 0 25 for i := range beforeLines { 26 lineOffsets = append(lineOffsets, total) 27 total += len(beforeLines[i]) 28 } 29 lineOffsets = append(lineOffsets, total) // EOF 30 31 edits := make([]diff.Edit, 0, len(ops)) 32 for _, op := range ops { 33 start, end := lineOffsets[op.I1], lineOffsets[op.I2] 34 switch op.Kind { 35 case diff.Delete: 36 // Delete: before[I1:I2] is deleted. 37 edits = append(edits, diff.Edit{Start: start, End: end}) 38 case diff.Insert: 39 // Insert: after[J1:J2] is inserted at before[I1:I1]. 40 if content := strings.Join(op.Content, ""); content != "" { 41 edits = append(edits, diff.Edit{Start: start, End: end, New: content}) 42 } 43 } 44 } 45 return edits 46 } 47 48 type operation struct { 49 Kind diff.OpKind 50 Content []string // content from b 51 I1, I2 int // indices of the line in a 52 J1 int // indices of the line in b, J2 implied by len(Content) 53 } 54 55 // operations returns the list of operations to convert a into b, consolidating 56 // operations for multiple lines and not including equal lines. 57 func operations(a, b []string) []*operation { 58 if len(a) == 0 && len(b) == 0 { 59 return nil 60 } 61 62 trace, offset := shortestEditSequence(a, b) 63 snakes := backtrack(trace, len(a), len(b), offset) 64 65 M, N := len(a), len(b) 66 67 var i int 68 solution := make([]*operation, len(a)+len(b)) 69 70 add := func(op *operation, i2, j2 int) { 71 if op == nil { 72 return 73 } 74 op.I2 = i2 75 if op.Kind == diff.Insert { 76 op.Content = b[op.J1:j2] 77 } 78 solution[i] = op 79 i++ 80 } 81 x, y := 0, 0 82 for _, snake := range snakes { 83 if len(snake) < 2 { 84 continue 85 } 86 var op *operation 87 // delete (horizontal) 88 for snake[0]-snake[1] > x-y { 89 if op == nil { 90 op = &operation{ 91 Kind: diff.Delete, 92 I1: x, 93 J1: y, 94 } 95 } 96 x++ 97 if x == M { 98 break 99 } 100 } 101 add(op, x, y) 102 op = nil 103 // insert (vertical) 104 for snake[0]-snake[1] < x-y { 105 if op == nil { 106 op = &operation{ 107 Kind: diff.Insert, 108 I1: x, 109 J1: y, 110 } 111 } 112 y++ 113 } 114 add(op, x, y) 115 op = nil 116 // equal (diagonal) 117 for x < snake[0] { 118 x++ 119 y++ 120 } 121 if x >= M && y >= N { 122 break 123 } 124 } 125 return solution[:i] 126 } 127 128 // backtrack uses the trace for the edit sequence computation and returns the 129 // "snakes" that make up the solution. A "snake" is a single deletion or 130 // insertion followed by zero or diagonals. 131 func backtrack(trace [][]int, x, y, offset int) [][]int { 132 snakes := make([][]int, len(trace)) 133 d := len(trace) - 1 134 for ; x > 0 && y > 0 && d > 0; d-- { 135 V := trace[d] 136 if len(V) == 0 { 137 continue 138 } 139 snakes[d] = []int{x, y} 140 141 k := x - y 142 143 var kPrev int 144 if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { 145 kPrev = k + 1 146 } else { 147 kPrev = k - 1 148 } 149 150 x = V[kPrev+offset] 151 y = x - kPrev 152 } 153 if x < 0 || y < 0 { 154 return snakes 155 } 156 snakes[d] = []int{x, y} 157 return snakes 158 } 159 160 // shortestEditSequence returns the shortest edit sequence that converts a into b. 161 func shortestEditSequence(a, b []string) ([][]int, int) { 162 M, N := len(a), len(b) 163 V := make([]int, 2*(N+M)+1) 164 offset := N + M 165 trace := make([][]int, N+M+1) 166 167 // Iterate through the maximum possible length of the SES (N+M). 168 for d := 0; d <= N+M; d++ { 169 copyV := make([]int, len(V)) 170 // k lines are represented by the equation y = x - k. We move in 171 // increments of 2 because end points for even d are on even k lines. 172 for k := -d; k <= d; k += 2 { 173 // At each point, we either go down or to the right. We go down if 174 // k == -d, and we go to the right if k == d. We also prioritize 175 // the maximum x value, because we prefer deletions to insertions. 176 var x int 177 if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { 178 x = V[k+1+offset] // down 179 } else { 180 x = V[k-1+offset] + 1 // right 181 } 182 183 y := x - k 184 185 // Diagonal moves while we have equal contents. 186 for x < M && y < N && a[x] == b[y] { 187 x++ 188 y++ 189 } 190 191 V[k+offset] = x 192 193 // Return if we've exceeded the maximum values. 194 if x == M && y == N { 195 // Makes sure to save the state of the array before returning. 196 copy(copyV, V) 197 trace[d] = copyV 198 return trace, offset 199 } 200 } 201 202 // Save the state of the array. 203 copy(copyV, V) 204 trace[d] = copyV 205 } 206 return nil, 0 207 } 208 209 func splitLines(text string) []string { 210 lines := strings.SplitAfter(text, "\n") 211 if lines[len(lines)-1] == "" { 212 lines = lines[:len(lines)-1] 213 } 214 return lines 215 }