github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/internal/diff/unified.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package diff
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"strings"
    11  )
    12  
    13  // Unified returns a unified diff of the old and new strings.
    14  // The old and new labels are the names of the old and new files.
    15  // If the strings are equal, it returns the empty string.
    16  func Unified(oldLabel, newLabel, old, new string) string {
    17  	edits := Strings(old, new)
    18  	unified, err := ToUnified(oldLabel, newLabel, old, edits)
    19  	if err != nil {
    20  		// Can't happen: edits are consistent.
    21  		log.Fatalf("internal error in diff.Unified: %v", err)
    22  	}
    23  	return unified
    24  }
    25  
    26  // ToUnified applies the edits to content and returns a unified diff.
    27  // The old and new labels are the names of the content and result files.
    28  // It returns an error if the edits are inconsistent; see ApplyEdits.
    29  func ToUnified(oldLabel, newLabel, content string, edits []Edit) (string, error) {
    30  	u, err := toUnified(oldLabel, newLabel, content, edits)
    31  	if err != nil {
    32  		return "", err
    33  	}
    34  	return u.String(), nil
    35  }
    36  
    37  // unified represents a set of edits as a unified diff.
    38  type unified struct {
    39  	// From is the name of the original file.
    40  	From string
    41  	// To is the name of the modified file.
    42  	To string
    43  	// Hunks is the set of edit hunks needed to transform the file content.
    44  	Hunks []*hunk
    45  }
    46  
    47  // Hunk represents a contiguous set of line edits to apply.
    48  type hunk struct {
    49  	// The line in the original source where the hunk starts.
    50  	FromLine int
    51  	// The line in the original source where the hunk finishes.
    52  	ToLine int
    53  	// The set of line based edits to apply.
    54  	Lines []line
    55  }
    56  
    57  // Line represents a single line operation to apply as part of a Hunk.
    58  type line struct {
    59  	// Kind is the type of line this represents, deletion, insertion or copy.
    60  	Kind OpKind
    61  	// Content is the content of this line.
    62  	// For deletion it is the line being removed, for all others it is the line
    63  	// to put in the output.
    64  	Content string
    65  }
    66  
    67  // OpKind is used to denote the type of operation a line represents.
    68  // TODO(adonovan): hide this once the myers package no longer references it.
    69  type OpKind int
    70  
    71  const (
    72  	// Delete is the operation kind for a line that is present in the input
    73  	// but not in the output.
    74  	Delete OpKind = iota
    75  	// Insert is the operation kind for a line that is new in the output.
    76  	Insert
    77  	// Equal is the operation kind for a line that is the same in the input and
    78  	// output, often used to provide context around edited lines.
    79  	Equal
    80  )
    81  
    82  // String returns a human readable representation of an OpKind. It is not
    83  // intended for machine processing.
    84  func (k OpKind) String() string {
    85  	switch k {
    86  	case Delete:
    87  		return "delete"
    88  	case Insert:
    89  		return "insert"
    90  	case Equal:
    91  		return "equal"
    92  	default:
    93  		panic("unknown operation kind")
    94  	}
    95  }
    96  
    97  const (
    98  	edge = 3
    99  	gap  = edge * 2
   100  )
   101  
   102  // toUnified takes a file contents and a sequence of edits, and calculates
   103  // a unified diff that represents those edits.
   104  func toUnified(fromName, toName string, content string, edits []Edit) (unified, error) {
   105  	u := unified{
   106  		From: fromName,
   107  		To:   toName,
   108  	}
   109  	if len(edits) == 0 {
   110  		return u, nil
   111  	}
   112  	var err error
   113  	edits, err = lineEdits(content, edits) // expand to whole lines
   114  	if err != nil {
   115  		return u, err
   116  	}
   117  	lines := splitLines(content)
   118  	var h *hunk
   119  	last := 0
   120  	toLine := 0
   121  	for _, edit := range edits {
   122  		// Compute the zero-based line numbers of the edit start and end.
   123  		// TODO(adonovan): opt: compute incrementally, avoid O(n^2).
   124  		start := strings.Count(content[:edit.Start], "\n")
   125  		end := strings.Count(content[:edit.End], "\n")
   126  		if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' {
   127  			end++ // EOF counts as an implicit newline
   128  		}
   129  
   130  		switch {
   131  		case h != nil && start == last:
   132  			//direct extension
   133  		case h != nil && start <= last+gap:
   134  			//within range of previous lines, add the joiners
   135  			addEqualLines(h, lines, last, start)
   136  		default:
   137  			//need to start a new hunk
   138  			if h != nil {
   139  				// add the edge to the previous hunk
   140  				addEqualLines(h, lines, last, last+edge)
   141  				u.Hunks = append(u.Hunks, h)
   142  			}
   143  			toLine += start - last
   144  			h = &hunk{
   145  				FromLine: start + 1,
   146  				ToLine:   toLine + 1,
   147  			}
   148  			// add the edge to the new hunk
   149  			delta := addEqualLines(h, lines, start-edge, start)
   150  			h.FromLine -= delta
   151  			h.ToLine -= delta
   152  		}
   153  		last = start
   154  		for i := start; i < end; i++ {
   155  			h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]})
   156  			last++
   157  		}
   158  		if edit.New != "" {
   159  			for _, content := range splitLines(edit.New) {
   160  				h.Lines = append(h.Lines, line{Kind: Insert, Content: content})
   161  				toLine++
   162  			}
   163  		}
   164  	}
   165  	if h != nil {
   166  		// add the edge to the final hunk
   167  		addEqualLines(h, lines, last, last+edge)
   168  		u.Hunks = append(u.Hunks, h)
   169  	}
   170  	return u, nil
   171  }
   172  
   173  func splitLines(text string) []string {
   174  	lines := strings.SplitAfter(text, "\n")
   175  	if lines[len(lines)-1] == "" {
   176  		lines = lines[:len(lines)-1]
   177  	}
   178  	return lines
   179  }
   180  
   181  func addEqualLines(h *hunk, lines []string, start, end int) int {
   182  	delta := 0
   183  	for i := start; i < end; i++ {
   184  		if i < 0 {
   185  			continue
   186  		}
   187  		if i >= len(lines) {
   188  			return delta
   189  		}
   190  		h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]})
   191  		delta++
   192  	}
   193  	return delta
   194  }
   195  
   196  // String converts a unified diff to the standard textual form for that diff.
   197  // The output of this function can be passed to tools like patch.
   198  func (u unified) String() string {
   199  	if len(u.Hunks) == 0 {
   200  		return ""
   201  	}
   202  	b := new(strings.Builder)
   203  	fmt.Fprintf(b, "--- %s\n", u.From)
   204  	fmt.Fprintf(b, "+++ %s\n", u.To)
   205  	for _, hunk := range u.Hunks {
   206  		fromCount, toCount := 0, 0
   207  		for _, l := range hunk.Lines {
   208  			switch l.Kind {
   209  			case Delete:
   210  				fromCount++
   211  			case Insert:
   212  				toCount++
   213  			default:
   214  				fromCount++
   215  				toCount++
   216  			}
   217  		}
   218  		fmt.Fprint(b, "@@")
   219  		if fromCount > 1 {
   220  			fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount)
   221  		} else if hunk.FromLine == 1 && fromCount == 0 {
   222  			// Match odd GNU diff -u behavior adding to empty file.
   223  			fmt.Fprintf(b, " -0,0")
   224  		} else {
   225  			fmt.Fprintf(b, " -%d", hunk.FromLine)
   226  		}
   227  		if toCount > 1 {
   228  			fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount)
   229  		} else {
   230  			fmt.Fprintf(b, " +%d", hunk.ToLine)
   231  		}
   232  		fmt.Fprint(b, " @@\n")
   233  		for _, l := range hunk.Lines {
   234  			switch l.Kind {
   235  			case Delete:
   236  				fmt.Fprintf(b, "-%s", l.Content)
   237  			case Insert:
   238  				fmt.Fprintf(b, "+%s", l.Content)
   239  			default:
   240  				fmt.Fprintf(b, " %s", l.Content)
   241  			}
   242  			if !strings.HasSuffix(l.Content, "\n") {
   243  				fmt.Fprintf(b, "\n\\ No newline at end of file\n")
   244  			}
   245  		}
   246  	}
   247  	return b.String()
   248  }