github.com/massongit/reviewdog@v0.0.0-20240331071725-4a16675475a8/diff/parse.go (about) 1 package diff 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 "strconv" 10 "strings" 11 ) 12 13 const ( 14 tokenDiff = "diff" // diff --git a/sample.old.txt b/sample.new.txt 15 tokenOldFile = "---" // --- sample.old.txt 2016-10-13 05:09:35.820791185 +0900 16 tokenNewFile = "+++" // +++ sample.new.txt 2016-10-13 05:15:26.839245048 +0900 17 tokenStartHunk = "@@" // @@ -1,3 +1,4 @@ 18 tokenUnchangedLine = " " // unchanged, contextual line 19 tokenAddedLine = "+" // +added line 20 tokenDeletedLine = "-" // -deleted line 21 tokenNoNewlineAtEOF = `\` // \ No newline at end of file 22 ) 23 24 var ( 25 // ErrNoNewFile represents error which there are no expected new file line. 26 ErrNoNewFile = errors.New("no expected new file line") // +++ newfile 27 // ErrNoHunks represents error which there are no expected hunks. 28 ErrNoHunks = errors.New("no expected hunks") // @@ -1,3 +1,4 @@ 29 ) 30 31 // ErrInvalidHunkRange represents invalid line of hunk range. @@ -1,3 +1,4 @@ 32 type ErrInvalidHunkRange struct { 33 invalid string 34 } 35 36 func (e *ErrInvalidHunkRange) Error() string { 37 return fmt.Sprintf("invalid hunk range: %v", e.invalid) 38 } 39 40 // ParseMultiFile parses a multi-file unified diff. 41 func ParseMultiFile(r io.Reader) ([]*FileDiff, error) { 42 return (&multiFileParser{r: bufio.NewReader(r)}).Parse() 43 } 44 45 type multiFileParser struct { 46 r *bufio.Reader 47 } 48 49 func (p *multiFileParser) Parse() ([]*FileDiff, error) { 50 var fds []*FileDiff 51 fp := &fileParser{r: p.r} 52 for { 53 fd, err := fp.Parse() 54 if err != nil || fd == nil { 55 break 56 } 57 fds = append(fds, fd) 58 } 59 return fds, nil 60 } 61 62 // ParseFile parses a file unified diff. 63 func ParseFile(r io.Reader) (*FileDiff, error) { 64 return (&fileParser{r: bufio.NewReader(r)}).Parse() 65 } 66 67 type fileParser struct { 68 r *bufio.Reader 69 } 70 71 func (p *fileParser) Parse() (*FileDiff, error) { 72 fd := &FileDiff{} 73 fd.Extended = parseExtendedHeader(p.r) 74 b, err := p.r.Peek(len(tokenOldFile)) 75 if err != nil { 76 if err == io.EOF && len(fd.Extended) > 0 { 77 return fd, nil 78 } 79 return nil, nil 80 } 81 if bytes.HasPrefix(b, []byte(tokenOldFile)) { 82 // parse `--- sample.old.txt 2016-10-13 05:09:35.820791185 +0900` 83 oldline, _ := readline(p.r) // ignore err because we know it can read something 84 fd.PathOld, fd.TimeOld = parseFileHeader(oldline) 85 // parse `+++ sample.new.txt 2016-10-13 05:09:35.820791185 +0900` 86 if b, err := p.r.Peek(len(tokenNewFile)); err != nil || !bytes.HasPrefix(b, []byte(tokenNewFile)) { 87 return nil, ErrNoNewFile 88 } 89 newline, _ := readline(p.r) // ignore err because we know it can read something 90 fd.PathNew, fd.TimeNew = parseFileHeader(newline) 91 } 92 // parse hunks 93 fd.Hunks, err = p.parseHunks() 94 if err != nil { 95 return nil, err 96 } 97 return fd, nil 98 } 99 100 func (p *fileParser) parseHunks() ([]*Hunk, error) { 101 b, err := p.r.Peek(len(tokenOldFile)) 102 if err != nil { 103 return nil, ErrNoHunks 104 } 105 if !bytes.HasPrefix(b, []byte(tokenStartHunk)) { 106 b, err := p.r.Peek(len(tokenDiff)) 107 if err != nil { 108 return nil, ErrNoHunks 109 } 110 if bytes.HasPrefix(b, []byte(tokenDiff)) { 111 // git diff may contain a file diff with empty hunks. 112 // e.g. delete an empty file. 113 return []*Hunk{}, nil 114 } 115 return nil, ErrNoHunks 116 } 117 var hunks []*Hunk 118 hp := &hunkParser{r: p.r} 119 for { 120 h, err := hp.Parse() 121 if err != nil { 122 return nil, err 123 } 124 if h == nil { 125 break 126 } 127 hunks = append(hunks, h) 128 } 129 return hunks, nil 130 } 131 132 // parseFileHeader parses file header line and returns filename and timestamp. 133 // timestamp may be empty. 134 func parseFileHeader(line string) (filename, timestamp string) { 135 // strip `+++ ` or `--- ` 136 ss := line[len(tokenOldFile)+1:] 137 tabi := strings.LastIndex(ss, "\t") 138 if tabi == -1 { 139 return unquoteCStyle(ss), "" 140 } 141 return unquoteCStyle(ss[:tabi]), ss[tabi+1:] 142 } 143 144 // C-style name unquoting. 145 // it is from https://github.com/git/git/blob/77556354bb7ac50450e3b28999e3576969869068/quote.c#L345-L413 146 func unquoteCStyle(str string) string { 147 if !strings.HasPrefix(str, `"`) { 148 // no need to unquote 149 return str 150 } 151 str = strings.TrimPrefix(strings.TrimSuffix(str, `"`), `"`) 152 153 res := make([]byte, 0, len(str)) 154 r := strings.NewReader(str) 155 LOOP: 156 for { 157 ch, err := r.ReadByte() 158 if err != nil { 159 break 160 } 161 if ch != '\\' { 162 res = append(res, ch) 163 continue 164 } 165 166 ch, err = r.ReadByte() 167 if err != nil { 168 break 169 } 170 switch ch { 171 case 'a': 172 res = append(res, '\a') 173 case 'b': 174 res = append(res, '\b') 175 case 't': 176 res = append(res, '\t') 177 case 'n': 178 res = append(res, '\n') 179 case 'v': 180 res = append(res, '\v') 181 case 'f': 182 res = append(res, '\f') 183 case 'r': 184 res = append(res, '\r') 185 case '"': 186 res = append(res, '"') 187 case '\\': 188 res = append(res, '\\') 189 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 190 if err := r.UnreadByte(); err != nil { 191 break LOOP 192 } 193 var oct [3]byte 194 if n, _ := r.Read(oct[:]); n < 3 { 195 res = append(res, oct[:n]...) 196 break LOOP 197 } 198 ch, err := strconv.ParseUint(string(oct[:]), 8, 8) 199 if err != nil { 200 res = append(res, oct[:]...) 201 break 202 } 203 res = append(res, byte(ch)) 204 default: 205 res = append(res, ch) 206 } 207 } 208 209 return string(res) 210 } 211 212 func parseExtendedHeader(r *bufio.Reader) []string { 213 var es []string 214 b, err := r.Peek(len(tokenDiff)) 215 if err != nil { 216 return nil 217 } 218 // if starts with 'diff', parse extended header 219 if bytes.HasPrefix(b, []byte(tokenDiff)) { 220 diffgitline, _ := readline(r) // ignore err because we know it can read something 221 es = append(es, diffgitline) 222 for { 223 b, err := r.Peek(len(tokenDiff)) 224 if err != nil || bytes.HasPrefix(b, []byte(tokenOldFile)) || bytes.HasPrefix(b, []byte(tokenDiff)) { 225 break 226 } 227 line, _ := readline(r) 228 es = append(es, line) 229 } 230 } 231 return es 232 } 233 234 type hunkParser struct { 235 r *bufio.Reader 236 lnumdiff int 237 } 238 239 func (p *hunkParser) Parse() (*Hunk, error) { 240 if b, err := p.r.Peek(len(tokenStartHunk)); err != nil || !bytes.HasPrefix(b, []byte(tokenStartHunk)) { 241 return nil, nil 242 } 243 rangeline, _ := readline(p.r) 244 hr, err := parseHunkRange(rangeline) 245 if err != nil { 246 return nil, err 247 } 248 hunk := &Hunk{ 249 StartLineOld: hr.lold, 250 LineLengthOld: hr.sold, 251 StartLineNew: hr.lnew, 252 LineLengthNew: hr.snew, 253 Section: hr.section, 254 } 255 lold := hr.lold 256 lnew := hr.lnew 257 endhunk: 258 for !p.done(lold, lnew, hr) { 259 b, err := p.r.Peek(1) 260 if err != nil { 261 break 262 } 263 token := string(b) 264 switch token { 265 case tokenUnchangedLine, tokenAddedLine, tokenDeletedLine: 266 p.lnumdiff++ 267 l, _ := readline(p.r) 268 line := &Line{Content: l[len(token):]} // trim first token 269 switch token { 270 case tokenUnchangedLine: 271 line.Type = LineUnchanged 272 line.LnumDiff = p.lnumdiff 273 line.LnumOld = lold 274 line.LnumNew = lnew 275 lold++ 276 lnew++ 277 case tokenAddedLine: 278 line.Type = LineAdded 279 line.LnumDiff = p.lnumdiff 280 line.LnumNew = lnew 281 lnew++ 282 case tokenDeletedLine: 283 line.Type = LineDeleted 284 line.LnumDiff = p.lnumdiff 285 line.LnumOld = lold 286 lold++ 287 } 288 hunk.Lines = append(hunk.Lines, line) 289 case tokenNoNewlineAtEOF: 290 // skip \ No newline at end of file. just consume line 291 readline(p.r) 292 default: 293 break endhunk 294 } 295 } 296 p.lnumdiff++ // count up by an additional hunk 297 return hunk, nil 298 } 299 300 func (p *hunkParser) done(lold, lnew int, hr *hunkrange) bool { 301 end := lold >= hr.lold+hr.sold && lnew >= hr.lnew+hr.snew 302 if b, err := p.r.Peek(1); err != nil || (string(b) != tokenNoNewlineAtEOF && end) { 303 return true 304 } 305 return false 306 } 307 308 // @@ -l,s +l,s @@ optional section heading 309 type hunkrange struct { 310 lold, sold, lnew, snew int 311 section string 312 } 313 314 // @@ -lold[,sold] +lnew[,snew] @@[ section] 315 // 0 1 2 3 4 316 func parseHunkRange(rangeline string) (*hunkrange, error) { 317 ps := strings.SplitN(rangeline, " ", 5) 318 invalidErr := &ErrInvalidHunkRange{invalid: rangeline} 319 hunkrange := &hunkrange{} 320 if len(ps) < 4 || ps[0] != "@@" || ps[3] != "@@" { 321 return nil, invalidErr 322 } 323 old := ps[1] // -lold[,sold] 324 if !strings.HasPrefix(old, "-") { 325 return nil, invalidErr 326 } 327 lold, sold, err := parseLS(old[1:]) 328 if err != nil { 329 return nil, invalidErr 330 } 331 hunkrange.lold = lold 332 hunkrange.sold = sold 333 new := ps[2] // +lnew[,snew] 334 if !strings.HasPrefix(new, "+") { 335 return nil, invalidErr 336 } 337 lnew, snew, err := parseLS(new[1:]) 338 if err != nil { 339 return nil, invalidErr 340 } 341 hunkrange.lnew = lnew 342 hunkrange.snew = snew 343 if len(ps) == 5 { 344 hunkrange.section = ps[4] 345 } 346 return hunkrange, nil 347 } 348 349 // l[,s] 350 func parseLS(ls string) (l, s int, err error) { 351 ss := strings.SplitN(ls, ",", 2) 352 l, err = strconv.Atoi(ss[0]) 353 if err != nil { 354 return 0, 0, err 355 } 356 if len(ss) == 2 { 357 s, err = strconv.Atoi(ss[1]) 358 if err != nil { 359 return 0, 0, err 360 } 361 } else { 362 s = 1 363 } 364 return l, s, nil 365 } 366 367 // readline reads a whole line. 368 func readline(r *bufio.Reader) (string, error) { 369 line, isPrefix, err := r.ReadLine() 370 if err != nil { 371 return "", err 372 } 373 // append all remaining line content 374 if isPrefix { 375 l := make([]byte, len(line)) 376 copy(l, line) 377 for isPrefix { 378 line, isPrefix, err = r.ReadLine() 379 if err != nil { 380 return "", err 381 } 382 l = append(l, line...) 383 } 384 line = l 385 } 386 return string(line), nil 387 }