golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/internal/diff/ndiff.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package diff 6 7 import ( 8 "bytes" 9 "unicode/utf8" 10 11 "golang.org/x/tools/internal/diff/lcs" 12 ) 13 14 // Strings computes the differences between two strings. 15 // The resulting edits respect rune boundaries. 16 func Strings(before, after string) []Edit { 17 if before == after { 18 return nil // common case 19 } 20 21 if isASCII(before) && isASCII(after) { 22 // TODO(adonovan): opt: specialize diffASCII for strings. 23 return diffASCII([]byte(before), []byte(after)) 24 } 25 return diffRunes([]rune(before), []rune(after)) 26 } 27 28 // Bytes computes the differences between two byte slices. 29 // The resulting edits respect rune boundaries. 30 func Bytes(before, after []byte) []Edit { 31 if bytes.Equal(before, after) { 32 return nil // common case 33 } 34 35 if isASCII(before) && isASCII(after) { 36 return diffASCII(before, after) 37 } 38 return diffRunes(runes(before), runes(after)) 39 } 40 41 func diffASCII(before, after []byte) []Edit { 42 diffs := lcs.DiffBytes(before, after) 43 44 // Convert from LCS diffs. 45 res := make([]Edit, len(diffs)) 46 for i, d := range diffs { 47 res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])} 48 } 49 return res 50 } 51 52 func diffRunes(before, after []rune) []Edit { 53 diffs := lcs.DiffRunes(before, after) 54 55 // The diffs returned by the lcs package use indexes 56 // into whatever slice was passed in. 57 // Convert rune offsets to byte offsets. 58 res := make([]Edit, len(diffs)) 59 lastEnd := 0 60 utf8Len := 0 61 for i, d := range diffs { 62 utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits 63 start := utf8Len 64 utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit 65 res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])} 66 lastEnd = d.End 67 } 68 return res 69 } 70 71 // runes is like []rune(string(bytes)) without the duplicate allocation. 72 func runes(bytes []byte) []rune { 73 n := utf8.RuneCount(bytes) 74 runes := make([]rune, n) 75 for i := 0; i < n; i++ { 76 r, sz := utf8.DecodeRune(bytes) 77 bytes = bytes[sz:] 78 runes[i] = r 79 } 80 return runes 81 } 82 83 // runesLen returns the length in bytes of the UTF-8 encoding of runes. 84 func runesLen(runes []rune) (len int) { 85 for _, r := range runes { 86 len += utf8.RuneLen(r) 87 } 88 return len 89 } 90 91 // isASCII reports whether s contains only ASCII. 92 func isASCII[S string | []byte](s S) bool { 93 for i := 0; i < len(s); i++ { 94 if s[i] >= utf8.RuneSelf { 95 return false 96 } 97 } 98 return true 99 }