golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/internal/diff/ndiff.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package diff
     6  
     7  import (
     8  	"bytes"
     9  	"unicode/utf8"
    10  
    11  	"golang.org/x/tools/internal/diff/lcs"
    12  )
    13  
    14  // Strings computes the differences between two strings.
    15  // The resulting edits respect rune boundaries.
    16  func Strings(before, after string) []Edit {
    17  	if before == after {
    18  		return nil // common case
    19  	}
    20  
    21  	if isASCII(before) && isASCII(after) {
    22  		// TODO(adonovan): opt: specialize diffASCII for strings.
    23  		return diffASCII([]byte(before), []byte(after))
    24  	}
    25  	return diffRunes([]rune(before), []rune(after))
    26  }
    27  
    28  // Bytes computes the differences between two byte slices.
    29  // The resulting edits respect rune boundaries.
    30  func Bytes(before, after []byte) []Edit {
    31  	if bytes.Equal(before, after) {
    32  		return nil // common case
    33  	}
    34  
    35  	if isASCII(before) && isASCII(after) {
    36  		return diffASCII(before, after)
    37  	}
    38  	return diffRunes(runes(before), runes(after))
    39  }
    40  
    41  func diffASCII(before, after []byte) []Edit {
    42  	diffs := lcs.DiffBytes(before, after)
    43  
    44  	// Convert from LCS diffs.
    45  	res := make([]Edit, len(diffs))
    46  	for i, d := range diffs {
    47  		res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])}
    48  	}
    49  	return res
    50  }
    51  
    52  func diffRunes(before, after []rune) []Edit {
    53  	diffs := lcs.DiffRunes(before, after)
    54  
    55  	// The diffs returned by the lcs package use indexes
    56  	// into whatever slice was passed in.
    57  	// Convert rune offsets to byte offsets.
    58  	res := make([]Edit, len(diffs))
    59  	lastEnd := 0
    60  	utf8Len := 0
    61  	for i, d := range diffs {
    62  		utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits
    63  		start := utf8Len
    64  		utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit
    65  		res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])}
    66  		lastEnd = d.End
    67  	}
    68  	return res
    69  }
    70  
    71  // runes is like []rune(string(bytes)) without the duplicate allocation.
    72  func runes(bytes []byte) []rune {
    73  	n := utf8.RuneCount(bytes)
    74  	runes := make([]rune, n)
    75  	for i := 0; i < n; i++ {
    76  		r, sz := utf8.DecodeRune(bytes)
    77  		bytes = bytes[sz:]
    78  		runes[i] = r
    79  	}
    80  	return runes
    81  }
    82  
    83  // runesLen returns the length in bytes of the UTF-8 encoding of runes.
    84  func runesLen(runes []rune) (len int) {
    85  	for _, r := range runes {
    86  		len += utf8.RuneLen(r)
    87  	}
    88  	return len
    89  }
    90  
    91  // isASCII reports whether s contains only ASCII.
    92  func isASCII[S string | []byte](s S) bool {
    93  	for i := 0; i < len(s); i++ {
    94  		if s[i] >= utf8.RuneSelf {
    95  			return false
    96  		}
    97  	}
    98  	return true
    99  }