github.com/haya14busa/reviewdog@v0.0.0-20180723114510-ffb00ef78fd3/diff/parse.go (about) 1 package diff 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 "strconv" 10 "strings" 11 ) 12 13 const ( 14 tokenDiffGit = "diff --git" // diff --git a/sample.old.txt b/sample.new.txt 15 tokenOldFile = "---" // --- sample.old.txt 2016-10-13 05:09:35.820791185 +0900 16 tokenNewFile = "+++" // +++ sample.new.txt 2016-10-13 05:15:26.839245048 +0900 17 tokenStartHunk = "@@" // @@ -1,3 +1,4 @@ 18 tokenUnchangedLine = " " // unchanged, contextual line 19 tokenAddedLine = "+" // +added line 20 tokenDeletedLine = "-" // -deleted line 21 tokenNoNewlineAtEOF = `\` // \ No newline at end of file 22 ) 23 24 var ( 25 // ErrNoNewFile represents error which there are no expected new file line. 26 ErrNoNewFile = errors.New("no expected new file line") // +++ newfile 27 // ErrNoHunks represents error which there are no expected hunks. 28 ErrNoHunks = errors.New("no expected hunks") // @@ -1,3 +1,4 @@ 29 ) 30 31 // ErrInvalidHunkRange represents invalid line of hunk range. @@ -1,3 +1,4 @@ 32 type ErrInvalidHunkRange struct { 33 invalid string 34 } 35 36 func (e *ErrInvalidHunkRange) Error() string { 37 return fmt.Sprintf("invalid hunk range: %v", e.invalid) 38 } 39 40 // ParseMultiFile parses a multi-file unified diff. 41 func ParseMultiFile(r io.Reader) ([]*FileDiff, error) { 42 return (&multiFileParser{r: bufio.NewReader(r)}).Parse() 43 } 44 45 type multiFileParser struct { 46 r *bufio.Reader 47 } 48 49 func (p *multiFileParser) Parse() ([]*FileDiff, error) { 50 var fds []*FileDiff 51 fp := &fileParser{r: p.r} 52 for { 53 fd, err := fp.Parse() 54 if err != nil || fd == nil { 55 break 56 } 57 fds = append(fds, fd) 58 } 59 return fds, nil 60 } 61 62 // ParseFile parses a file unified diff. 63 func ParseFile(r io.Reader) (*FileDiff, error) { 64 return (&fileParser{r: bufio.NewReader(r)}).Parse() 65 } 66 67 type fileParser struct { 68 r *bufio.Reader 69 } 70 71 func (p *fileParser) Parse() (*FileDiff, error) { 72 fd := &FileDiff{} 73 fd.Extended = parseExtendedHeader(p.r) 74 b, err := p.r.Peek(len(tokenOldFile)) 75 if err != nil { 76 if err == io.EOF && len(fd.Extended) > 0 { 77 return fd, nil 78 } 79 return nil, nil 80 } 81 if bytes.HasPrefix(b, []byte(tokenOldFile)) { 82 // parse `--- sample.old.txt 2016-10-13 05:09:35.820791185 +0900` 83 oldline, _ := readline(p.r) // ignore err because we know it can read something 84 fd.PathOld, fd.TimeOld = parseFileHeader(oldline) 85 // parse `+++ sample.new.txt 2016-10-13 05:09:35.820791185 +0900` 86 if b, err := p.r.Peek(len(tokenNewFile)); err != nil || !bytes.HasPrefix(b, []byte(tokenNewFile)) { 87 return nil, ErrNoNewFile 88 } 89 newline, _ := readline(p.r) // ignore err because we know it can read something 90 fd.PathNew, fd.TimeNew = parseFileHeader(newline) 91 } 92 // parse hunks 93 fd.Hunks, err = p.parseHunks() 94 if err != nil { 95 return nil, err 96 } 97 return fd, nil 98 } 99 100 func (p *fileParser) parseHunks() ([]*Hunk, error) { 101 b, err := p.r.Peek(len(tokenOldFile)) 102 if err != nil { 103 return nil, ErrNoHunks 104 } 105 if !bytes.HasPrefix(b, []byte(tokenStartHunk)) { 106 b, err := p.r.Peek(len(tokenDiffGit)) 107 if err != nil { 108 return nil, ErrNoHunks 109 } 110 if bytes.HasPrefix(b, []byte(tokenDiffGit)) { 111 // git diff may contain a file diff with empty hunks. 112 // e.g. delete an empty file. 113 return []*Hunk{}, nil 114 } 115 return nil, ErrNoHunks 116 } 117 var hunks []*Hunk 118 hp := &hunkParser{r: p.r} 119 for { 120 h, err := hp.Parse() 121 if err != nil { 122 return nil, err 123 } 124 if h == nil { 125 break 126 } 127 hunks = append(hunks, h) 128 } 129 return hunks, nil 130 } 131 132 // parseFileHeader parses file header line and returns filename and timestamp. 133 // timestamp may be empty. 134 func parseFileHeader(line string) (filename, timestamp string) { 135 // strip `+++ ` or `--- ` 136 ss := line[len(tokenOldFile)+1:] 137 tabi := strings.LastIndex(ss, "\t") 138 if tabi == -1 { 139 return ss, "" 140 } 141 return ss[:tabi], ss[tabi+1:] 142 } 143 144 func parseExtendedHeader(r *bufio.Reader) []string { 145 var es []string 146 b, err := r.Peek(len(tokenDiffGit)) 147 if err != nil { 148 return nil 149 } 150 // if starts with 'diff --git', parse extended header 151 if bytes.HasPrefix(b, []byte(tokenDiffGit)) { 152 diffgitline, _ := readline(r) // ignore err because we know it can read something 153 es = append(es, diffgitline) 154 for { 155 b, err := r.Peek(len(tokenDiffGit)) 156 if err != nil || bytes.HasPrefix(b, []byte(tokenOldFile)) || bytes.HasPrefix(b, []byte(tokenDiffGit)) { 157 break 158 } 159 line, _ := readline(r) 160 es = append(es, string(line)) 161 } 162 } 163 return es 164 } 165 166 type hunkParser struct { 167 r *bufio.Reader 168 lnumdiff int 169 } 170 171 func (p *hunkParser) Parse() (*Hunk, error) { 172 if b, err := p.r.Peek(len(tokenStartHunk)); err != nil || !bytes.HasPrefix(b, []byte(tokenStartHunk)) { 173 return nil, nil 174 } 175 rangeline, _ := readline(p.r) 176 hr, err := parseHunkRange(rangeline) 177 if err != nil { 178 return nil, err 179 } 180 hunk := &Hunk{ 181 StartLineOld: hr.lold, 182 LineLengthOld: hr.sold, 183 StartLineNew: hr.lnew, 184 LineLengthNew: hr.snew, 185 Section: hr.section, 186 } 187 lold := hr.lold 188 lnew := hr.lnew 189 endhunk: 190 for !p.done(lold, lnew, hr) { 191 b, err := p.r.Peek(1) 192 if err != nil { 193 break 194 } 195 token := string(b) 196 switch token { 197 case tokenUnchangedLine, tokenAddedLine, tokenDeletedLine: 198 p.lnumdiff++ 199 l, _ := readline(p.r) 200 line := &Line{Content: l[len(token):]} // trim first token 201 switch token { 202 case tokenUnchangedLine: 203 line.Type = LineUnchanged 204 line.LnumDiff = p.lnumdiff 205 line.LnumOld = lold 206 line.LnumNew = lnew 207 lold++ 208 lnew++ 209 case tokenAddedLine: 210 line.Type = LineAdded 211 line.LnumDiff = p.lnumdiff 212 line.LnumNew = lnew 213 lnew++ 214 case tokenDeletedLine: 215 line.Type = LineDeleted 216 line.LnumDiff = p.lnumdiff 217 line.LnumOld = lold 218 lold++ 219 } 220 hunk.Lines = append(hunk.Lines, line) 221 case tokenNoNewlineAtEOF: 222 // skip \ No newline at end of file. just consume line 223 readline(p.r) 224 default: 225 break endhunk 226 } 227 } 228 p.lnumdiff++ // count up by an additional hunk 229 return hunk, nil 230 } 231 232 func (p *hunkParser) done(lold, lnew int, hr *hunkrange) bool { 233 end := (lold >= hr.lold+hr.sold && lnew >= hr.lnew+hr.snew) 234 if b, err := p.r.Peek(1); err != nil || (string(b) != tokenNoNewlineAtEOF && end) { 235 return true 236 } 237 return false 238 } 239 240 // @@ -l,s +l,s @@ optional section heading 241 type hunkrange struct { 242 lold, sold, lnew, snew int 243 section string 244 } 245 246 // @@ -lold[,sold] +lnew[,snew] @@[ section] 247 // 0 1 2 3 4 248 func parseHunkRange(rangeline string) (*hunkrange, error) { 249 ps := strings.SplitN(rangeline, " ", 5) 250 invalidErr := &ErrInvalidHunkRange{invalid: rangeline} 251 hunkrange := &hunkrange{} 252 if len(ps) < 4 || ps[0] != "@@" || ps[3] != "@@" { 253 return nil, invalidErr 254 } 255 old := ps[1] // -lold[,sold] 256 if !strings.HasPrefix(old, "-") { 257 return nil, invalidErr 258 } 259 lold, sold, err := parseLS(old[1:]) 260 if err != nil { 261 return nil, invalidErr 262 } 263 hunkrange.lold = lold 264 hunkrange.sold = sold 265 new := ps[2] // +lnew[,snew] 266 if !strings.HasPrefix(new, "+") { 267 return nil, invalidErr 268 } 269 lnew, snew, err := parseLS(new[1:]) 270 if err != nil { 271 return nil, invalidErr 272 } 273 hunkrange.lnew = lnew 274 hunkrange.snew = snew 275 if len(ps) == 5 { 276 hunkrange.section = ps[4] 277 } 278 return hunkrange, nil 279 } 280 281 // l[,s] 282 func parseLS(ls string) (l, s int, err error) { 283 ss := strings.SplitN(ls, ",", 2) 284 l, err = strconv.Atoi(ss[0]) 285 if err != nil { 286 return 0, 0, err 287 } 288 if len(ss) == 2 { 289 s, err = strconv.Atoi(ss[1]) 290 if err != nil { 291 return 0, 0, err 292 } 293 } else { 294 s = 1 295 } 296 return l, s, nil 297 } 298 299 // readline reads lines from bufio.Reader with size limit. It consumes 300 // remaining content even if the line size reaches size limit. 301 func readline(r *bufio.Reader) (string, error) { 302 line, isPrefix, err := r.ReadLine() 303 if err != nil { 304 return "", err 305 } 306 // consume all remaining line content 307 for isPrefix { 308 _, isPrefix, _ = r.ReadLine() 309 } 310 return string(line), nil 311 }