github.com/haya14busa/reviewdog@v0.0.0-20180723114510-ffb00ef78fd3/diff/parse.go (about)

     1  package diff
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"strconv"
    10  	"strings"
    11  )
    12  
    13  const (
    14  	tokenDiffGit        = "diff --git" // diff --git a/sample.old.txt b/sample.new.txt
    15  	tokenOldFile        = "---"        // --- sample.old.txt	2016-10-13 05:09:35.820791185 +0900
    16  	tokenNewFile        = "+++"        // +++ sample.new.txt	2016-10-13 05:15:26.839245048 +0900
    17  	tokenStartHunk      = "@@"         // @@ -1,3 +1,4 @@
    18  	tokenUnchangedLine  = " "          //  unchanged, contextual line
    19  	tokenAddedLine      = "+"          // +added line
    20  	tokenDeletedLine    = "-"          // -deleted line
    21  	tokenNoNewlineAtEOF = `\`          // \ No newline at end of file
    22  )
    23  
    24  var (
    25  	// ErrNoNewFile represents error which there are no expected new file line.
    26  	ErrNoNewFile = errors.New("no expected new file line") // +++ newfile
    27  	// ErrNoHunks represents error which there are no expected hunks.
    28  	ErrNoHunks = errors.New("no expected hunks") // @@ -1,3 +1,4 @@
    29  )
    30  
    31  // ErrInvalidHunkRange represents invalid line of hunk range. @@ -1,3 +1,4 @@
    32  type ErrInvalidHunkRange struct {
    33  	invalid string
    34  }
    35  
    36  func (e *ErrInvalidHunkRange) Error() string {
    37  	return fmt.Sprintf("invalid hunk range: %v", e.invalid)
    38  }
    39  
    40  // ParseMultiFile parses a multi-file unified diff.
    41  func ParseMultiFile(r io.Reader) ([]*FileDiff, error) {
    42  	return (&multiFileParser{r: bufio.NewReader(r)}).Parse()
    43  }
    44  
    45  type multiFileParser struct {
    46  	r *bufio.Reader
    47  }
    48  
    49  func (p *multiFileParser) Parse() ([]*FileDiff, error) {
    50  	var fds []*FileDiff
    51  	fp := &fileParser{r: p.r}
    52  	for {
    53  		fd, err := fp.Parse()
    54  		if err != nil || fd == nil {
    55  			break
    56  		}
    57  		fds = append(fds, fd)
    58  	}
    59  	return fds, nil
    60  }
    61  
    62  // ParseFile parses a file unified diff.
    63  func ParseFile(r io.Reader) (*FileDiff, error) {
    64  	return (&fileParser{r: bufio.NewReader(r)}).Parse()
    65  }
    66  
    67  type fileParser struct {
    68  	r *bufio.Reader
    69  }
    70  
    71  func (p *fileParser) Parse() (*FileDiff, error) {
    72  	fd := &FileDiff{}
    73  	fd.Extended = parseExtendedHeader(p.r)
    74  	b, err := p.r.Peek(len(tokenOldFile))
    75  	if err != nil {
    76  		if err == io.EOF && len(fd.Extended) > 0 {
    77  			return fd, nil
    78  		}
    79  		return nil, nil
    80  	}
    81  	if bytes.HasPrefix(b, []byte(tokenOldFile)) {
    82  		// parse `--- sample.old.txt	2016-10-13 05:09:35.820791185 +0900`
    83  		oldline, _ := readline(p.r) // ignore err because we know it can read something
    84  		fd.PathOld, fd.TimeOld = parseFileHeader(oldline)
    85  		// parse `+++ sample.new.txt	2016-10-13 05:09:35.820791185 +0900`
    86  		if b, err := p.r.Peek(len(tokenNewFile)); err != nil || !bytes.HasPrefix(b, []byte(tokenNewFile)) {
    87  			return nil, ErrNoNewFile
    88  		}
    89  		newline, _ := readline(p.r) // ignore err because we know it can read something
    90  		fd.PathNew, fd.TimeNew = parseFileHeader(newline)
    91  	}
    92  	// parse hunks
    93  	fd.Hunks, err = p.parseHunks()
    94  	if err != nil {
    95  		return nil, err
    96  	}
    97  	return fd, nil
    98  }
    99  
   100  func (p *fileParser) parseHunks() ([]*Hunk, error) {
   101  	b, err := p.r.Peek(len(tokenOldFile))
   102  	if err != nil {
   103  		return nil, ErrNoHunks
   104  	}
   105  	if !bytes.HasPrefix(b, []byte(tokenStartHunk)) {
   106  		b, err := p.r.Peek(len(tokenDiffGit))
   107  		if err != nil {
   108  			return nil, ErrNoHunks
   109  		}
   110  		if bytes.HasPrefix(b, []byte(tokenDiffGit)) {
   111  			// git diff may contain a file diff with empty hunks.
   112  			// e.g. delete an empty file.
   113  			return []*Hunk{}, nil
   114  		}
   115  		return nil, ErrNoHunks
   116  	}
   117  	var hunks []*Hunk
   118  	hp := &hunkParser{r: p.r}
   119  	for {
   120  		h, err := hp.Parse()
   121  		if err != nil {
   122  			return nil, err
   123  		}
   124  		if h == nil {
   125  			break
   126  		}
   127  		hunks = append(hunks, h)
   128  	}
   129  	return hunks, nil
   130  }
   131  
   132  // parseFileHeader parses file header line and returns filename and timestamp.
   133  // timestamp may be empty.
   134  func parseFileHeader(line string) (filename, timestamp string) {
   135  	// strip `+++ ` or `--- `
   136  	ss := line[len(tokenOldFile)+1:]
   137  	tabi := strings.LastIndex(ss, "\t")
   138  	if tabi == -1 {
   139  		return ss, ""
   140  	}
   141  	return ss[:tabi], ss[tabi+1:]
   142  }
   143  
   144  func parseExtendedHeader(r *bufio.Reader) []string {
   145  	var es []string
   146  	b, err := r.Peek(len(tokenDiffGit))
   147  	if err != nil {
   148  		return nil
   149  	}
   150  	// if starts with 'diff --git', parse extended header
   151  	if bytes.HasPrefix(b, []byte(tokenDiffGit)) {
   152  		diffgitline, _ := readline(r) // ignore err because we know it can read something
   153  		es = append(es, diffgitline)
   154  		for {
   155  			b, err := r.Peek(len(tokenDiffGit))
   156  			if err != nil || bytes.HasPrefix(b, []byte(tokenOldFile)) || bytes.HasPrefix(b, []byte(tokenDiffGit)) {
   157  				break
   158  			}
   159  			line, _ := readline(r)
   160  			es = append(es, string(line))
   161  		}
   162  	}
   163  	return es
   164  }
   165  
   166  type hunkParser struct {
   167  	r        *bufio.Reader
   168  	lnumdiff int
   169  }
   170  
   171  func (p *hunkParser) Parse() (*Hunk, error) {
   172  	if b, err := p.r.Peek(len(tokenStartHunk)); err != nil || !bytes.HasPrefix(b, []byte(tokenStartHunk)) {
   173  		return nil, nil
   174  	}
   175  	rangeline, _ := readline(p.r)
   176  	hr, err := parseHunkRange(rangeline)
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  	hunk := &Hunk{
   181  		StartLineOld:  hr.lold,
   182  		LineLengthOld: hr.sold,
   183  		StartLineNew:  hr.lnew,
   184  		LineLengthNew: hr.snew,
   185  		Section:       hr.section,
   186  	}
   187  	lold := hr.lold
   188  	lnew := hr.lnew
   189  endhunk:
   190  	for !p.done(lold, lnew, hr) {
   191  		b, err := p.r.Peek(1)
   192  		if err != nil {
   193  			break
   194  		}
   195  		token := string(b)
   196  		switch token {
   197  		case tokenUnchangedLine, tokenAddedLine, tokenDeletedLine:
   198  			p.lnumdiff++
   199  			l, _ := readline(p.r)
   200  			line := &Line{Content: l[len(token):]} // trim first token
   201  			switch token {
   202  			case tokenUnchangedLine:
   203  				line.Type = LineUnchanged
   204  				line.LnumDiff = p.lnumdiff
   205  				line.LnumOld = lold
   206  				line.LnumNew = lnew
   207  				lold++
   208  				lnew++
   209  			case tokenAddedLine:
   210  				line.Type = LineAdded
   211  				line.LnumDiff = p.lnumdiff
   212  				line.LnumNew = lnew
   213  				lnew++
   214  			case tokenDeletedLine:
   215  				line.Type = LineDeleted
   216  				line.LnumDiff = p.lnumdiff
   217  				line.LnumOld = lold
   218  				lold++
   219  			}
   220  			hunk.Lines = append(hunk.Lines, line)
   221  		case tokenNoNewlineAtEOF:
   222  			// skip \ No newline at end of file. just consume line
   223  			readline(p.r)
   224  		default:
   225  			break endhunk
   226  		}
   227  	}
   228  	p.lnumdiff++ // count up by an additional hunk
   229  	return hunk, nil
   230  }
   231  
   232  func (p *hunkParser) done(lold, lnew int, hr *hunkrange) bool {
   233  	end := (lold >= hr.lold+hr.sold && lnew >= hr.lnew+hr.snew)
   234  	if b, err := p.r.Peek(1); err != nil || (string(b) != tokenNoNewlineAtEOF && end) {
   235  		return true
   236  	}
   237  	return false
   238  }
   239  
   240  // @@ -l,s +l,s @@ optional section heading
   241  type hunkrange struct {
   242  	lold, sold, lnew, snew int
   243  	section                string
   244  }
   245  
   246  // @@ -lold[,sold] +lnew[,snew] @@[ section]
   247  // 0  1              2            3   4
   248  func parseHunkRange(rangeline string) (*hunkrange, error) {
   249  	ps := strings.SplitN(rangeline, " ", 5)
   250  	invalidErr := &ErrInvalidHunkRange{invalid: rangeline}
   251  	hunkrange := &hunkrange{}
   252  	if len(ps) < 4 || ps[0] != "@@" || ps[3] != "@@" {
   253  		return nil, invalidErr
   254  	}
   255  	old := ps[1] // -lold[,sold]
   256  	if !strings.HasPrefix(old, "-") {
   257  		return nil, invalidErr
   258  	}
   259  	lold, sold, err := parseLS(old[1:])
   260  	if err != nil {
   261  		return nil, invalidErr
   262  	}
   263  	hunkrange.lold = lold
   264  	hunkrange.sold = sold
   265  	new := ps[2] // +lnew[,snew]
   266  	if !strings.HasPrefix(new, "+") {
   267  		return nil, invalidErr
   268  	}
   269  	lnew, snew, err := parseLS(new[1:])
   270  	if err != nil {
   271  		return nil, invalidErr
   272  	}
   273  	hunkrange.lnew = lnew
   274  	hunkrange.snew = snew
   275  	if len(ps) == 5 {
   276  		hunkrange.section = ps[4]
   277  	}
   278  	return hunkrange, nil
   279  }
   280  
   281  // l[,s]
   282  func parseLS(ls string) (l, s int, err error) {
   283  	ss := strings.SplitN(ls, ",", 2)
   284  	l, err = strconv.Atoi(ss[0])
   285  	if err != nil {
   286  		return 0, 0, err
   287  	}
   288  	if len(ss) == 2 {
   289  		s, err = strconv.Atoi(ss[1])
   290  		if err != nil {
   291  			return 0, 0, err
   292  		}
   293  	} else {
   294  		s = 1
   295  	}
   296  	return l, s, nil
   297  }
   298  
   299  // readline reads lines from bufio.Reader with size limit. It consumes
   300  // remaining content even if the line size reaches size limit.
   301  func readline(r *bufio.Reader) (string, error) {
   302  	line, isPrefix, err := r.ReadLine()
   303  	if err != nil {
   304  		return "", err
   305  	}
   306  	// consume all remaining line content
   307  	for isPrefix {
   308  		_, isPrefix, _ = r.ReadLine()
   309  	}
   310  	return string(line), nil
   311  }