github.com/v2pro/plz@v0.0.0-20221028024117-e5f9aec5b631/test/go-difflib/difflib/difflib.go (about)

     1  // Package difflib is a partial port of Python difflib module.
     2  //
     3  // It provides tools to compare sequences of strings and generate textual diffs.
     4  //
     5  // The following class and functions have been ported:
     6  //
     7  // - SequenceMatcher
     8  //
     9  // - unified_diff
    10  //
    11  // - context_diff
    12  //
    13  // Getting unified diffs was the main goal of the port. Keep in mind this code
    14  // is mostly suitable to output text differences in a human friendly way, there
    15  // are no guarantees generated diffs are consumable by patch(1).
    16  package difflib
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"fmt"
    22  	"io"
    23  	"strings"
    24  )
    25  
    26  func min(a, b int) int {
    27  	if a < b {
    28  		return a
    29  	}
    30  	return b
    31  }
    32  
    33  func max(a, b int) int {
    34  	if a > b {
    35  		return a
    36  	}
    37  	return b
    38  }
    39  
    40  func calculateRatio(matches, length int) float64 {
    41  	if length > 0 {
    42  		return 2.0 * float64(matches) / float64(length)
    43  	}
    44  	return 1.0
    45  }
    46  
    47  type Match struct {
    48  	A    int
    49  	B    int
    50  	Size int
    51  }
    52  
    53  type OpCode struct {
    54  	Tag byte
    55  	I1  int
    56  	I2  int
    57  	J1  int
    58  	J2  int
    59  }
    60  
    61  // SequenceMatcher compares sequence of strings. The basic
    62  // algorithm predates, and is a little fancier than, an algorithm
    63  // published in the late 1980's by Ratcliff and Obershelp under the
    64  // hyperbolic name "gestalt pattern matching".  The basic idea is to find
    65  // the longest contiguous matching subsequence that contains no "junk"
    66  // elements (R-O doesn't address junk).  The same idea is then applied
    67  // recursively to the pieces of the sequences to the left and to the right
    68  // of the matching subsequence.  This does not yield minimal edit
    69  // sequences, but does tend to yield matches that "look right" to people.
    70  //
    71  // SequenceMatcher tries to compute a "human-friendly diff" between two
    72  // sequences.  Unlike e.g. UNIX(tm) diff, the fundamental notion is the
    73  // longest *contiguous* & junk-free matching subsequence.  That's what
    74  // catches peoples' eyes.  The Windows(tm) windiff has another interesting
    75  // notion, pairing up elements that appear uniquely in each sequence.
    76  // That, and the method here, appear to yield more intuitive difference
    77  // reports than does diff.  This method appears to be the least vulnerable
    78  // to synching up on blocks of "junk lines", though (like blank lines in
    79  // ordinary text files, or maybe "<P>" lines in HTML files).  That may be
    80  // because this is the only method of the 3 that has a *concept* of
    81  // "junk" <wink>.
    82  //
    83  // Timing:  Basic R-O is cubic time worst case and quadratic time expected
    84  // case.  SequenceMatcher is quadratic time for the worst case and has
    85  // expected-case behavior dependent in a complicated way on how many
    86  // elements the sequences have in common; best case time is linear.
    87  type SequenceMatcher struct {
    88  	a              []string
    89  	b              []string
    90  	b2j            map[string][]int
    91  	IsJunk         func(string) bool
    92  	autoJunk       bool
    93  	bJunk          map[string]struct{}
    94  	matchingBlocks []Match
    95  	fullBCount     map[string]int
    96  	bPopular       map[string]struct{}
    97  	opCodes        []OpCode
    98  }
    99  
   100  func NewMatcher(a, b []string) *SequenceMatcher {
   101  	m := SequenceMatcher{autoJunk: true}
   102  	m.SetSeqs(a, b)
   103  	return &m
   104  }
   105  
   106  func NewMatcherWithJunk(a, b []string, autoJunk bool,
   107  	isJunk func(string) bool) *SequenceMatcher {
   108  
   109  	m := SequenceMatcher{IsJunk: isJunk, autoJunk: autoJunk}
   110  	m.SetSeqs(a, b)
   111  	return &m
   112  }
   113  
   114  // SetIndex two sequences to be compared.
   115  func (m *SequenceMatcher) SetSeqs(a, b []string) {
   116  	m.SetSeq1(a)
   117  	m.SetSeq2(b)
   118  }
   119  
   120  // SetIndex the first sequence to be compared. The second sequence to be compared is
   121  // not changed.
   122  //
   123  // SequenceMatcher computes and caches detailed information about the second
   124  // sequence, so if you want to compare one sequence S against many sequences,
   125  // use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other
   126  // sequences.
   127  //
   128  // See also SetSeqs() and SetSeq2().
   129  func (m *SequenceMatcher) SetSeq1(a []string) {
   130  	if &a == &m.a {
   131  		return
   132  	}
   133  	m.a = a
   134  	m.matchingBlocks = nil
   135  	m.opCodes = nil
   136  }
   137  
   138  // SetIndex the second sequence to be compared. The first sequence to be compared is
   139  // not changed.
   140  func (m *SequenceMatcher) SetSeq2(b []string) {
   141  	if &b == &m.b {
   142  		return
   143  	}
   144  	m.b = b
   145  	m.matchingBlocks = nil
   146  	m.opCodes = nil
   147  	m.fullBCount = nil
   148  	m.chainB()
   149  }
   150  
   151  func (m *SequenceMatcher) chainB() {
   152  	// Populate line -> index mapping
   153  	b2j := map[string][]int{}
   154  	for i, s := range m.b {
   155  		indices := b2j[s]
   156  		indices = append(indices, i)
   157  		b2j[s] = indices
   158  	}
   159  
   160  	// Purge junk elements
   161  	m.bJunk = map[string]struct{}{}
   162  	if m.IsJunk != nil {
   163  		junk := m.bJunk
   164  		for s, _ := range b2j {
   165  			if m.IsJunk(s) {
   166  				junk[s] = struct{}{}
   167  			}
   168  		}
   169  		for s, _ := range junk {
   170  			delete(b2j, s)
   171  		}
   172  	}
   173  
   174  	// Purge remaining popular elements
   175  	popular := map[string]struct{}{}
   176  	n := len(m.b)
   177  	if m.autoJunk && n >= 200 {
   178  		ntest := n/100 + 1
   179  		for s, indices := range b2j {
   180  			if len(indices) > ntest {
   181  				popular[s] = struct{}{}
   182  			}
   183  		}
   184  		for s, _ := range popular {
   185  			delete(b2j, s)
   186  		}
   187  	}
   188  	m.bPopular = popular
   189  	m.b2j = b2j
   190  }
   191  
   192  func (m *SequenceMatcher) isBJunk(s string) bool {
   193  	_, ok := m.bJunk[s]
   194  	return ok
   195  }
   196  
   197  // Find longest matching block in a[alo:ahi] and b[blo:bhi].
   198  //
   199  // If IsJunk is not defined:
   200  //
   201  // Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
   202  //     alo <= i <= i+k <= ahi
   203  //     blo <= j <= j+k <= bhi
   204  // and for all (i',j',k') meeting those conditions,
   205  //     k >= k'
   206  //     i <= i'
   207  //     and if i == i', j <= j'
   208  //
   209  // In other words, of all maximal matching blocks, return one that
   210  // starts earliest in a, and of all those maximal matching blocks that
   211  // start earliest in a, return the one that starts earliest in b.
   212  //
   213  // If IsJunk is defined, first the longest matching block is
   214  // determined as above, but with the additional restriction that no
   215  // junk element appears in the block.  Then that block is extended as
   216  // far as possible by matching (only) junk elements on both sides.  So
   217  // the resulting block never matches on junk except as identical junk
   218  // happens to be adjacent to an "interesting" match.
   219  //
   220  // If no blocks match, return (alo, blo, 0).
   221  func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match {
   222  	// CAUTION:  stripping common prefix or suffix would be incorrect.
   223  	// E.g.,
   224  	//    ab
   225  	//    acab
   226  	// Longest matching block is "ab", but if common prefix is
   227  	// stripped, it's "a" (tied with "b").  UNIX(tm) diff does so
   228  	// strip, so ends up claiming that ab is changed to acab by
   229  	// inserting "ca" in the middle.  That's minimal but unintuitive:
   230  	// "it's obvious" that someone inserted "ac" at the front.
   231  	// Windiff ends up at the same place as diff, but by pairing up
   232  	// the unique 'b's and then matching the first two 'a's.
   233  	besti, bestj, bestsize := alo, blo, 0
   234  
   235  	// find longest junk-free match
   236  	// during an iteration of the loop, j2len[j] = length of longest
   237  	// junk-free match ending with a[i-1] and b[j]
   238  	j2len := map[int]int{}
   239  	for i := alo; i != ahi; i++ {
   240  		// look at all instances of a[i] in b; note that because
   241  		// b2j has no junk keys, the loop is skipped if a[i] is junk
   242  		newj2len := map[int]int{}
   243  		for _, j := range m.b2j[m.a[i]] {
   244  			// a[i] matches b[j]
   245  			if j < blo {
   246  				continue
   247  			}
   248  			if j >= bhi {
   249  				break
   250  			}
   251  			k := j2len[j-1] + 1
   252  			newj2len[j] = k
   253  			if k > bestsize {
   254  				besti, bestj, bestsize = i-k+1, j-k+1, k
   255  			}
   256  		}
   257  		j2len = newj2len
   258  	}
   259  
   260  	// Extend the best by non-junk elements on each end.  In particular,
   261  	// "popular" non-junk elements aren't in b2j, which greatly speeds
   262  	// the inner loop above, but also means "the best" match so far
   263  	// doesn't contain any junk *or* popular non-junk elements.
   264  	for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) &&
   265  		m.a[besti-1] == m.b[bestj-1] {
   266  		besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
   267  	}
   268  	for besti+bestsize < ahi && bestj+bestsize < bhi &&
   269  		!m.isBJunk(m.b[bestj+bestsize]) &&
   270  		m.a[besti+bestsize] == m.b[bestj+bestsize] {
   271  		bestsize += 1
   272  	}
   273  
   274  	// Now that we have a wholly interesting match (albeit possibly
   275  	// empty!), we may as well suck up the matching junk on each
   276  	// side of it too.  Can't think of a good reason not to, and it
   277  	// saves post-processing the (possibly considerable) expense of
   278  	// figuring out what to do with it.  In the case of an empty
   279  	// interesting match, this is clearly the right thing to do,
   280  	// because no other kind of match is possible in the regions.
   281  	for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) &&
   282  		m.a[besti-1] == m.b[bestj-1] {
   283  		besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
   284  	}
   285  	for besti+bestsize < ahi && bestj+bestsize < bhi &&
   286  		m.isBJunk(m.b[bestj+bestsize]) &&
   287  		m.a[besti+bestsize] == m.b[bestj+bestsize] {
   288  		bestsize += 1
   289  	}
   290  
   291  	return Match{A: besti, B: bestj, Size: bestsize}
   292  }
   293  
   294  // Return list of triples describing matching subsequences.
   295  //
   296  // Each triple is of the form (i, j, n), and means that
   297  // a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in
   298  // i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are
   299  // adjacent triples in the list, and the second is not the last triple in the
   300  // list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe
   301  // adjacent equal blocks.
   302  //
   303  // The last triple is a dummy, (len(a), len(b), 0), and is the only
   304  // triple with n==0.
   305  func (m *SequenceMatcher) GetMatchingBlocks() []Match {
   306  	if m.matchingBlocks != nil {
   307  		return m.matchingBlocks
   308  	}
   309  
   310  	var matchBlocks func(alo, ahi, blo, bhi int, matched []Match) []Match
   311  	matchBlocks = func(alo, ahi, blo, bhi int, matched []Match) []Match {
   312  		match := m.findLongestMatch(alo, ahi, blo, bhi)
   313  		i, j, k := match.A, match.B, match.Size
   314  		if match.Size > 0 {
   315  			if alo < i && blo < j {
   316  				matched = matchBlocks(alo, i, blo, j, matched)
   317  			}
   318  			matched = append(matched, match)
   319  			if i+k < ahi && j+k < bhi {
   320  				matched = matchBlocks(i+k, ahi, j+k, bhi, matched)
   321  			}
   322  		}
   323  		return matched
   324  	}
   325  	matched := matchBlocks(0, len(m.a), 0, len(m.b), nil)
   326  
   327  	// It's possible that we have adjacent equal blocks in the
   328  	// matching_blocks list now.
   329  	nonAdjacent := []Match{}
   330  	i1, j1, k1 := 0, 0, 0
   331  	for _, b := range matched {
   332  		// Is this block adjacent to i1, j1, k1?
   333  		i2, j2, k2 := b.A, b.B, b.Size
   334  		if i1+k1 == i2 && j1+k1 == j2 {
   335  			// Yes, so collapse them -- this just increases the length of
   336  			// the first block by the length of the second, and the first
   337  			// block so lengthened remains the block to compare against.
   338  			k1 += k2
   339  		} else {
   340  			// Not adjacent.  Remember the first block (k1==0 means it's
   341  			// the dummy we started with), and make the second block the
   342  			// new block to compare against.
   343  			if k1 > 0 {
   344  				nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
   345  			}
   346  			i1, j1, k1 = i2, j2, k2
   347  		}
   348  	}
   349  	if k1 > 0 {
   350  		nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
   351  	}
   352  
   353  	nonAdjacent = append(nonAdjacent, Match{len(m.a), len(m.b), 0})
   354  	m.matchingBlocks = nonAdjacent
   355  	return m.matchingBlocks
   356  }
   357  
   358  // Return list of 5-tuples describing how to turn a into b.
   359  //
   360  // Each tuple is of the form (tag, i1, i2, j1, j2).  The first tuple
   361  // has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
   362  // tuple preceding it, and likewise for j1 == the previous j2.
   363  //
   364  // The tags are characters, with these meanings:
   365  //
   366  // 'r' (replace):  a[i1:i2] should be replaced by b[j1:j2]
   367  //
   368  // 'd' (delete):   a[i1:i2] should be deleted, j1==j2 in this case.
   369  //
   370  // 'i' (insert):   b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case.
   371  //
   372  // 'e' (equal):    a[i1:i2] == b[j1:j2]
   373  func (m *SequenceMatcher) GetOpCodes() []OpCode {
   374  	if m.opCodes != nil {
   375  		return m.opCodes
   376  	}
   377  	i, j := 0, 0
   378  	matching := m.GetMatchingBlocks()
   379  	opCodes := make([]OpCode, 0, len(matching))
   380  	for _, m := range matching {
   381  		//  invariant:  we've pumped out correct diffs to change
   382  		//  a[:i] into b[:j], and the next matching block is
   383  		//  a[ai:ai+size] == b[bj:bj+size]. So we need to pump
   384  		//  out a diff to change a[i:ai] into b[j:bj], pump out
   385  		//  the matching block, and move (i,j) beyond the match
   386  		ai, bj, size := m.A, m.B, m.Size
   387  		tag := byte(0)
   388  		if i < ai && j < bj {
   389  			tag = 'r'
   390  		} else if i < ai {
   391  			tag = 'd'
   392  		} else if j < bj {
   393  			tag = 'i'
   394  		}
   395  		if tag > 0 {
   396  			opCodes = append(opCodes, OpCode{tag, i, ai, j, bj})
   397  		}
   398  		i, j = ai+size, bj+size
   399  		// the list of matching blocks is terminated by a
   400  		// sentinel with size 0
   401  		if size > 0 {
   402  			opCodes = append(opCodes, OpCode{'e', ai, i, bj, j})
   403  		}
   404  	}
   405  	m.opCodes = opCodes
   406  	return m.opCodes
   407  }
   408  
   409  // Isolate change clusters by eliminating ranges with no changes.
   410  //
   411  // Return a generator of groups with up to n lines of context.
   412  // Each group is in the same format as returned by GetOpCodes().
   413  func (m *SequenceMatcher) GetGroupedOpCodes(n int) [][]OpCode {
   414  	if n < 0 {
   415  		n = 3
   416  	}
   417  	codes := m.GetOpCodes()
   418  	if len(codes) == 0 {
   419  		codes = []OpCode{OpCode{'e', 0, 1, 0, 1}}
   420  	}
   421  	// Fixup leading and trailing groups if they show no changes.
   422  	if codes[0].Tag == 'e' {
   423  		c := codes[0]
   424  		i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   425  		codes[0] = OpCode{c.Tag, max(i1, i2-n), i2, max(j1, j2-n), j2}
   426  	}
   427  	if codes[len(codes)-1].Tag == 'e' {
   428  		c := codes[len(codes)-1]
   429  		i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   430  		codes[len(codes)-1] = OpCode{c.Tag, i1, min(i2, i1+n), j1, min(j2, j1+n)}
   431  	}
   432  	nn := n + n
   433  	groups := [][]OpCode{}
   434  	group := []OpCode{}
   435  	for _, c := range codes {
   436  		i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   437  		// End the current group and start a new one whenever
   438  		// there is a large range with no changes.
   439  		if c.Tag == 'e' && i2-i1 > nn {
   440  			group = append(group, OpCode{c.Tag, i1, min(i2, i1+n),
   441  				j1, min(j2, j1+n)})
   442  			groups = append(groups, group)
   443  			group = []OpCode{}
   444  			i1, j1 = max(i1, i2-n), max(j1, j2-n)
   445  		}
   446  		group = append(group, OpCode{c.Tag, i1, i2, j1, j2})
   447  	}
   448  	if len(group) > 0 && !(len(group) == 1 && group[0].Tag == 'e') {
   449  		groups = append(groups, group)
   450  	}
   451  	return groups
   452  }
   453  
   454  // Return a measure of the sequences' similarity (float in [0,1]).
   455  //
   456  // Where T is the total number of elements in both sequences, and
   457  // M is the number of matches, this is 2.0*M / T.
   458  // Note that this is 1 if the sequences are identical, and 0 if
   459  // they have nothing in common.
   460  //
   461  // .Ratio() is expensive to compute if you haven't already computed
   462  // .GetMatchingBlocks() or .GetOpCodes(), in which case you may
   463  // want to try .QuickRatio() or .RealQuickRation() first to get an
   464  // upper bound.
   465  func (m *SequenceMatcher) Ratio() float64 {
   466  	matches := 0
   467  	for _, m := range m.GetMatchingBlocks() {
   468  		matches += m.Size
   469  	}
   470  	return calculateRatio(matches, len(m.a)+len(m.b))
   471  }
   472  
   473  // Return an upper bound on ratio() relatively quickly.
   474  //
   475  // This isn't defined beyond that it is an upper bound on .Ratio(), and
   476  // is faster to compute.
   477  func (m *SequenceMatcher) QuickRatio() float64 {
   478  	// viewing a and b as multisets, set matches to the cardinality
   479  	// of their intersection; this counts the number of matches
   480  	// without regard to order, so is clearly an upper bound
   481  	if m.fullBCount == nil {
   482  		m.fullBCount = map[string]int{}
   483  		for _, s := range m.b {
   484  			m.fullBCount[s] = m.fullBCount[s] + 1
   485  		}
   486  	}
   487  
   488  	// avail[x] is the number of times x appears in 'b' less the
   489  	// number of times we've seen it in 'a' so far ... kinda
   490  	avail := map[string]int{}
   491  	matches := 0
   492  	for _, s := range m.a {
   493  		n, ok := avail[s]
   494  		if !ok {
   495  			n = m.fullBCount[s]
   496  		}
   497  		avail[s] = n - 1
   498  		if n > 0 {
   499  			matches += 1
   500  		}
   501  	}
   502  	return calculateRatio(matches, len(m.a)+len(m.b))
   503  }
   504  
   505  // Return an upper bound on ratio() very quickly.
   506  //
   507  // This isn't defined beyond that it is an upper bound on .Ratio(), and
   508  // is faster to compute than either .Ratio() or .QuickRatio().
   509  func (m *SequenceMatcher) RealQuickRatio() float64 {
   510  	la, lb := len(m.a), len(m.b)
   511  	return calculateRatio(min(la, lb), la+lb)
   512  }
   513  
   514  // Convert range to the "ed" format
   515  func formatRangeUnified(start, stop int) string {
   516  	// Per the diff spec at http://www.unix.org/single_unix_specification/
   517  	beginning := start + 1 // lines start numbering with one
   518  	length := stop - start
   519  	if length == 1 {
   520  		return fmt.Sprintf("%d", beginning)
   521  	}
   522  	if length == 0 {
   523  		beginning -= 1 // empty ranges begin at line just before the range
   524  	}
   525  	return fmt.Sprintf("%d,%d", beginning, length)
   526  }
   527  
   528  // Unified diff parameters
   529  type UnifiedDiff struct {
   530  	A        []string // First sequence lines
   531  	FromFile string   // First file name
   532  	FromDate string   // First file time
   533  	B        []string // Second sequence lines
   534  	ToFile   string   // Second file name
   535  	ToDate   string   // Second file time
   536  	Eol      string   // Headers end of line, defaults to LF
   537  	Context  int      // Number of context lines
   538  }
   539  
   540  // Compare two sequences of lines; generate the delta as a unified diff.
   541  //
   542  // Unified diffs are a compact way of showing line changes and a few
   543  // lines of context.  The number of context lines is set by 'n' which
   544  // defaults to three.
   545  //
   546  // By default, the diff control lines (those with ---, +++, or @@) are
   547  // created with a trailing newline.  This is helpful so that inputs
   548  // created from file.readlines() result in diffs that are suitable for
   549  // file.writelines() since both the inputs and outputs have trailing
   550  // newlines.
   551  //
   552  // For inputs that do not have trailing newlines, set the lineterm
   553  // argument to "" so that the output will be uniformly newline free.
   554  //
   555  // The unidiff format normally has a header for filenames and modification
   556  // times.  Any or all of these may be specified using strings for
   557  // 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
   558  // The modification times are normally expressed in the ISO 8601 format.
   559  func WriteUnifiedDiff(writer io.Writer, diff UnifiedDiff) error {
   560  	buf := bufio.NewWriter(writer)
   561  	defer buf.Flush()
   562  	w := func(format string, args ...interface{}) error {
   563  		_, err := buf.WriteString(fmt.Sprintf(format, args...))
   564  		return err
   565  	}
   566  
   567  	if len(diff.Eol) == 0 {
   568  		diff.Eol = "\n"
   569  	}
   570  
   571  	started := false
   572  	m := NewMatcher(diff.A, diff.B)
   573  	for _, g := range m.GetGroupedOpCodes(diff.Context) {
   574  		if !started {
   575  			started = true
   576  			fromDate := ""
   577  			if len(diff.FromDate) > 0 {
   578  				fromDate = "\t" + diff.FromDate
   579  			}
   580  			toDate := ""
   581  			if len(diff.ToDate) > 0 {
   582  				toDate = "\t" + diff.ToDate
   583  			}
   584  			err := w("--- %s%s%s", diff.FromFile, fromDate, diff.Eol)
   585  			if err != nil {
   586  				return err
   587  			}
   588  			err = w("+++ %s%s%s", diff.ToFile, toDate, diff.Eol)
   589  			if err != nil {
   590  				return err
   591  			}
   592  		}
   593  		first, last := g[0], g[len(g)-1]
   594  		range1 := formatRangeUnified(first.I1, last.I2)
   595  		range2 := formatRangeUnified(first.J1, last.J2)
   596  		if err := w("@@ -%s +%s @@%s", range1, range2, diff.Eol); err != nil {
   597  			return err
   598  		}
   599  		for _, c := range g {
   600  			i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   601  			if c.Tag == 'e' {
   602  				for _, line := range diff.A[i1:i2] {
   603  					if err := w(" " + line); err != nil {
   604  						return err
   605  					}
   606  				}
   607  				continue
   608  			}
   609  			if c.Tag == 'r' || c.Tag == 'd' {
   610  				for _, line := range diff.A[i1:i2] {
   611  					if err := w("-" + line); err != nil {
   612  						return err
   613  					}
   614  				}
   615  			}
   616  			if c.Tag == 'r' || c.Tag == 'i' {
   617  				for _, line := range diff.B[j1:j2] {
   618  					if err := w("+" + line); err != nil {
   619  						return err
   620  					}
   621  				}
   622  			}
   623  		}
   624  	}
   625  	return nil
   626  }
   627  
   628  // Like WriteUnifiedDiff but returns the diff a string.
   629  func GetUnifiedDiffString(diff UnifiedDiff) (string, error) {
   630  	w := &bytes.Buffer{}
   631  	err := WriteUnifiedDiff(w, diff)
   632  	return string(w.Bytes()), err
   633  }
   634  
   635  // Convert range to the "ed" format.
   636  func formatRangeContext(start, stop int) string {
   637  	// Per the diff spec at http://www.unix.org/single_unix_specification/
   638  	beginning := start + 1 // lines start numbering with one
   639  	length := stop - start
   640  	if length == 0 {
   641  		beginning -= 1 // empty ranges begin at line just before the range
   642  	}
   643  	if length <= 1 {
   644  		return fmt.Sprintf("%d", beginning)
   645  	}
   646  	return fmt.Sprintf("%d,%d", beginning, beginning+length-1)
   647  }
   648  
   649  type ContextDiff UnifiedDiff
   650  
   651  // Compare two sequences of lines; generate the delta as a context diff.
   652  //
   653  // Context diffs are a compact way of showing line changes and a few
   654  // lines of context. The number of context lines is set by diff.Context
   655  // which defaults to three.
   656  //
   657  // By default, the diff control lines (those with *** or ---) are
   658  // created with a trailing newline.
   659  //
   660  // For inputs that do not have trailing newlines, set the diff.Eol
   661  // argument to "" so that the output will be uniformly newline free.
   662  //
   663  // The context diff format normally has a header for filenames and
   664  // modification times.  Any or all of these may be specified using
   665  // strings for diff.FromFile, diff.ToFile, diff.FromDate, diff.ToDate.
   666  // The modification times are normally expressed in the ISO 8601 format.
   667  // If not specified, the strings default to blanks.
   668  func WriteContextDiff(writer io.Writer, diff ContextDiff) error {
   669  	buf := bufio.NewWriter(writer)
   670  	defer buf.Flush()
   671  	var diffErr error
   672  	w := func(format string, args ...interface{}) {
   673  		_, err := buf.WriteString(fmt.Sprintf(format, args...))
   674  		if diffErr == nil && err != nil {
   675  			diffErr = err
   676  		}
   677  	}
   678  
   679  	if len(diff.Eol) == 0 {
   680  		diff.Eol = "\n"
   681  	}
   682  
   683  	prefix := map[byte]string{
   684  		'i': "+ ",
   685  		'd': "- ",
   686  		'r': "! ",
   687  		'e': "  ",
   688  	}
   689  
   690  	started := false
   691  	m := NewMatcher(diff.A, diff.B)
   692  	for _, g := range m.GetGroupedOpCodes(diff.Context) {
   693  		if !started {
   694  			started = true
   695  			fromDate := ""
   696  			if len(diff.FromDate) > 0 {
   697  				fromDate = "\t" + diff.FromDate
   698  			}
   699  			toDate := ""
   700  			if len(diff.ToDate) > 0 {
   701  				toDate = "\t" + diff.ToDate
   702  			}
   703  			w("*** %s%s%s", diff.FromFile, fromDate, diff.Eol)
   704  			w("--- %s%s%s", diff.ToFile, toDate, diff.Eol)
   705  		}
   706  
   707  		first, last := g[0], g[len(g)-1]
   708  		w("***************" + diff.Eol)
   709  
   710  		range1 := formatRangeContext(first.I1, last.I2)
   711  		w("*** %s ****%s", range1, diff.Eol)
   712  		for _, c := range g {
   713  			if c.Tag == 'r' || c.Tag == 'd' {
   714  				for _, cc := range g {
   715  					if cc.Tag == 'i' {
   716  						continue
   717  					}
   718  					for _, line := range diff.A[cc.I1:cc.I2] {
   719  						w(prefix[cc.Tag] + line)
   720  					}
   721  				}
   722  				break
   723  			}
   724  		}
   725  
   726  		range2 := formatRangeContext(first.J1, last.J2)
   727  		w("--- %s ----%s", range2, diff.Eol)
   728  		for _, c := range g {
   729  			if c.Tag == 'r' || c.Tag == 'i' {
   730  				for _, cc := range g {
   731  					if cc.Tag == 'd' {
   732  						continue
   733  					}
   734  					for _, line := range diff.B[cc.J1:cc.J2] {
   735  						w(prefix[cc.Tag] + line)
   736  					}
   737  				}
   738  				break
   739  			}
   740  		}
   741  	}
   742  	return diffErr
   743  }
   744  
   745  // Like WriteContextDiff but returns the diff a string.
   746  func GetContextDiffString(diff ContextDiff) (string, error) {
   747  	w := &bytes.Buffer{}
   748  	err := WriteContextDiff(w, diff)
   749  	return string(w.Bytes()), err
   750  }
   751  
   752  // Split a string on "\n" while preserving them. The output can be used
   753  // as input for UnifiedDiff and ContextDiff structures.
   754  func SplitLines(s string) []string {
   755  	lines := strings.SplitAfter(s, "\n")
   756  	lines[len(lines)-1] += "\n"
   757  	return lines
   758  }