github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/vcs/git.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package vcs 5 6 import ( 7 "bufio" 8 "bytes" 9 "errors" 10 "fmt" 11 "net/mail" 12 "os" 13 "os/exec" 14 "regexp" 15 "sort" 16 "strings" 17 "time" 18 19 "github.com/google/syzkaller/pkg/debugtracer" 20 "github.com/google/syzkaller/pkg/hash" 21 "github.com/google/syzkaller/pkg/log" 22 "github.com/google/syzkaller/pkg/osutil" 23 ) 24 25 type git struct { 26 dir string 27 ignoreCC map[string]bool 28 precious bool 29 sandbox bool 30 } 31 32 func newGit(dir string, ignoreCC map[string]bool, opts []RepoOpt) *git { 33 git := &git{ 34 dir: dir, 35 ignoreCC: ignoreCC, 36 sandbox: true, 37 } 38 for _, opt := range opts { 39 switch opt { 40 case OptPrecious: 41 git.precious = true 42 case OptDontSandbox: 43 git.sandbox = false 44 } 45 } 46 return git 47 } 48 49 func filterEnv() []string { 50 // We have to filter various git environment variables - if 51 // these variables are set (e.g. if a test is being run as 52 // part of a rebase) we're going to be acting on some other 53 // repository (e.g the syzkaller tree itself) rather than the 54 // intended repo. 55 env := os.Environ() 56 for i := 0; i < len(env); i++ { 57 if strings.HasPrefix(env[i], "GIT_DIR") || 58 strings.HasPrefix(env[i], "GIT_WORK_TREE") || 59 strings.HasPrefix(env[i], "GIT_INDEX_FILE") || 60 strings.HasPrefix(env[i], "GIT_OBJECT_DIRECTORY") { 61 env = append(env[:i], env[i+1:]...) 62 i-- 63 } 64 } 65 66 return env 67 } 68 69 func (git *git) Poll(repo, branch string) (*Commit, error) { 70 git.reset() 71 origin, err := git.git("remote", "get-url", "origin") 72 if err != nil || strings.TrimSpace(string(origin)) != repo { 73 // The repo is here, but it has wrong origin (e.g. repo in config has changed), re-clone. 74 if err := git.clone(repo, branch); err != nil { 75 return nil, err 76 } 77 } 78 // Use origin/branch for the case the branch was force-pushed, 79 // in such case branch is not the same is origin/branch and we will 80 // stuck with the local version forever (git checkout won't fail). 81 if _, err := git.git("checkout", "origin/"+branch); err != nil { 82 // No such branch (e.g. branch in config has changed), re-clone. 83 if err := git.clone(repo, branch); err != nil { 84 return nil, err 85 } 86 } 87 if _, err := git.git("fetch", "--force"); err != nil { 88 // Something else is wrong, re-clone. 89 if err := git.clone(repo, branch); err != nil { 90 return nil, err 91 } 92 } 93 if _, err := git.git("checkout", "origin/"+branch); err != nil { 94 return nil, err 95 } 96 if _, err := git.git("submodule", "update", "--init"); err != nil { 97 return nil, err 98 } 99 return git.HeadCommit() 100 } 101 102 func (git *git) CheckoutBranch(repo, branch string) (*Commit, error) { 103 if err := git.repair(); err != nil { 104 return nil, err 105 } 106 repoHash := hash.String([]byte(repo)) 107 // Because the HEAD is detached, submodules assumes "origin" to be the default 108 // remote when initializing. 109 // This sets "origin" to be the current remote. 110 // Ignore errors as we can double add or remove the same remote and that will fail. 111 git.git("remote", "rm", "origin") 112 git.git("remote", "add", "origin", repo) 113 git.git("remote", "add", repoHash, repo) 114 _, err := git.git("fetch", "--force", repoHash, branch) 115 if err != nil { 116 return nil, err 117 } 118 if _, err := git.git("checkout", "FETCH_HEAD", "--force"); err != nil { 119 return nil, err 120 } 121 if _, err := git.git("submodule", "update", "--init"); err != nil { 122 return nil, err 123 } 124 // If the branch checkout had to be "forced" the directory may 125 // contain remaining untracked files. 126 // Clean again to ensure the new branch is in a clean state. 127 if err := git.repair(); err != nil { 128 return nil, err 129 } 130 return git.HeadCommit() 131 } 132 133 func (git *git) CheckoutCommit(repo, commit string) (*Commit, error) { 134 if err := git.repair(); err != nil { 135 return nil, err 136 } 137 if err := git.fetchRemote(repo, commit); err != nil { 138 return nil, err 139 } 140 return git.SwitchCommit(commit) 141 } 142 143 func (git *git) fetchRemote(repo, commit string) error { 144 repoHash := hash.String([]byte(repo)) 145 // Ignore error as we can double add the same remote and that will fail. 146 git.git("remote", "add", repoHash, repo) 147 fetchArgs := []string{"fetch", "--force", "--tags", repoHash} 148 if commit != "" && gitFullHashRe.MatchString(commit) { 149 // This trick only works with full commit hashes. 150 fetchArgs = append(fetchArgs, commit) 151 } 152 _, err := git.git(fetchArgs...) 153 if err != nil { 154 var verbose *osutil.VerboseError 155 if errors.As(err, &verbose) && 156 bytes.Contains(verbose.Output, []byte("error: cannot lock ref")) { 157 // It can happen that the fetched repo has tags names that conflict 158 // with the ones already present in the repository. 159 // Try to fetch more, but this time prune tags, it should help. 160 // The --prune-tags option will remove all tags that are not present 161 // in this remote repo, so don't do it always. Only when necessary. 162 _, err = git.git("fetch", "--force", "--tags", "--prune", "--prune-tags", repoHash) 163 } 164 } 165 return err 166 } 167 168 func (git *git) SwitchCommit(commit string) (*Commit, error) { 169 if !git.precious { 170 git.git("reset", "--hard") 171 git.git("clean", "-fdx") 172 } 173 if _, err := git.git("checkout", commit); err != nil { 174 return nil, err 175 } 176 if _, err := git.git("submodule", "update", "--init"); err != nil { 177 return nil, err 178 } 179 return git.HeadCommit() 180 } 181 182 func (git *git) clone(repo, branch string) error { 183 if git.precious { 184 return fmt.Errorf("won't reinit precious repo") 185 } 186 if err := git.initRepo(nil); err != nil { 187 return err 188 } 189 if _, err := git.git("remote", "add", "origin", repo); err != nil { 190 return err 191 } 192 if _, err := git.git("fetch", "origin", branch); err != nil { 193 return err 194 } 195 return nil 196 } 197 198 func (git *git) reset() error { 199 // This function tries to reset git repo state to a known clean state. 200 if git.precious { 201 return nil 202 } 203 git.git("reset", "--hard", "--recurse-submodules") 204 git.git("clean", "-xfdf") 205 git.git("submodule", "foreach", "--recursive", "git", "clean", "-xfdf") 206 git.git("bisect", "reset") 207 _, err := git.git("reset", "--hard", "--recurse-submodules") 208 return err 209 } 210 211 func (git *git) repair() error { 212 if err := git.reset(); err != nil { 213 return git.initRepo(err) 214 } 215 return nil 216 } 217 218 func (git *git) initRepo(reason error) error { 219 if reason != nil { 220 log.Logf(1, "git: initializing repo at %v: %v", git.dir, reason) 221 } 222 if err := os.RemoveAll(git.dir); err != nil { 223 return fmt.Errorf("failed to remove repo dir: %w", err) 224 } 225 if err := osutil.MkdirAll(git.dir); err != nil { 226 return fmt.Errorf("failed to create repo dir: %w", err) 227 } 228 if git.sandbox { 229 if err := osutil.SandboxChown(git.dir); err != nil { 230 return err 231 } 232 } 233 if _, err := git.git("init"); err != nil { 234 return err 235 } 236 return nil 237 } 238 239 func (git *git) Contains(commit string) (bool, error) { 240 _, err := git.git("merge-base", "--is-ancestor", commit, "HEAD") 241 return err == nil, nil 242 } 243 244 func (git *git) HeadCommit() (*Commit, error) { 245 return git.getCommit("HEAD") 246 } 247 248 func (git *git) getCommit(commit string) (*Commit, error) { 249 output, err := git.git("log", "--format=%H%n%s%n%ae%n%an%n%ad%n%P%n%cd%n%b", "-n", "1", commit) 250 if err != nil { 251 return nil, err 252 } 253 return gitParseCommit(output, nil, nil, git.ignoreCC) 254 } 255 256 func gitParseCommit(output, user, domain []byte, ignoreCC map[string]bool) (*Commit, error) { 257 lines := bytes.Split(output, []byte{'\n'}) 258 if len(lines) < 8 || len(lines[0]) != 40 { 259 return nil, fmt.Errorf("unexpected git log output: %q", output) 260 } 261 const dateFormat = "Mon Jan 2 15:04:05 2006 -0700" 262 date, err := time.Parse(dateFormat, string(lines[4])) 263 if err != nil { 264 return nil, fmt.Errorf("failed to parse date in git log output: %w\n%q", err, output) 265 } 266 commitDate, err := time.Parse(dateFormat, string(lines[6])) 267 if err != nil { 268 return nil, fmt.Errorf("failed to parse date in git log output: %w\n%q", err, output) 269 } 270 recipients := make(map[string]bool) 271 recipients[strings.ToLower(string(lines[2]))] = true 272 var tags []string 273 // Use summary line + all description lines. 274 for _, line := range append([][]byte{lines[1]}, lines[7:]...) { 275 if user != nil { 276 userPos := bytes.Index(line, user) 277 if userPos != -1 { 278 domainPos := bytes.Index(line[userPos+len(user)+1:], domain) 279 if domainPos != -1 { 280 startPos := userPos + len(user) 281 endPos := userPos + len(user) + domainPos + 1 282 tag := string(line[startPos:endPos]) 283 present := false 284 for _, tag1 := range tags { 285 if tag1 == tag { 286 present = true 287 break 288 } 289 } 290 if !present { 291 tags = append(tags, tag) 292 } 293 } 294 } 295 } 296 for _, re := range ccRes { 297 matches := re.FindSubmatchIndex(line) 298 if matches == nil { 299 continue 300 } 301 addr, err := mail.ParseAddress(string(line[matches[2]:matches[3]])) 302 if err != nil { 303 break 304 } 305 email := strings.ToLower(addr.Address) 306 if ignoreCC[email] { 307 continue 308 } 309 recipients[email] = true 310 break 311 } 312 } 313 sortedRecipients := make(Recipients, 0, len(recipients)) 314 for addr := range recipients { 315 sortedRecipients = append(sortedRecipients, RecipientInfo{mail.Address{Address: addr}, To}) 316 } 317 sort.Sort(sortedRecipients) 318 parents := strings.Split(string(lines[5]), " ") 319 com := &Commit{ 320 Hash: string(lines[0]), 321 Title: string(lines[1]), 322 Author: string(lines[2]), 323 AuthorName: string(lines[3]), 324 Parents: parents, 325 Recipients: sortedRecipients, 326 Tags: tags, 327 Date: date, 328 CommitDate: commitDate, 329 } 330 return com, nil 331 } 332 333 func (git *git) GetCommitByTitle(title string) (*Commit, error) { 334 commits, _, err := git.GetCommitsByTitles([]string{title}) 335 if err != nil || len(commits) == 0 { 336 return nil, err 337 } 338 return commits[0], nil 339 } 340 341 const ( 342 fetchCommitsMaxAgeInYears = 5 343 ) 344 345 func (git *git) GetCommitsByTitles(titles []string) ([]*Commit, []string, error) { 346 var greps []string 347 m := make(map[string]string) 348 for _, title := range titles { 349 canonical := CanonicalizeCommit(title) 350 greps = append(greps, canonical) 351 m[canonical] = title 352 } 353 since := time.Now().Add(-time.Hour * 24 * 365 * fetchCommitsMaxAgeInYears).Format("01-02-2006") 354 commits, err := git.fetchCommits(since, "HEAD", "", "", greps, true) 355 if err != nil { 356 return nil, nil, err 357 } 358 var results []*Commit 359 for _, com := range commits { 360 canonical := CanonicalizeCommit(com.Title) 361 if orig := m[canonical]; orig != "" { 362 delete(m, canonical) 363 results = append(results, com) 364 com.Title = orig 365 } 366 } 367 var missing []string 368 for _, orig := range m { 369 missing = append(missing, orig) 370 } 371 return results, missing, nil 372 } 373 374 func (git *git) ListCommitHashes(baseCommit string) ([]string, error) { 375 output, err := git.git("log", "--pretty=format:%h", baseCommit) 376 if err != nil { 377 return nil, err 378 } 379 return strings.Split(string(output), "\n"), nil 380 } 381 382 func (git *git) ExtractFixTagsFromCommits(baseCommit, email string) ([]*Commit, error) { 383 user, domain, err := splitEmail(email) 384 if err != nil { 385 return nil, fmt.Errorf("failed to parse email %q: %w", email, err) 386 } 387 grep := user + "+.*" + domain 388 since := time.Now().Add(-time.Hour * 24 * 365 * fetchCommitsMaxAgeInYears).Format("01-02-2006") 389 return git.fetchCommits(since, baseCommit, user, domain, []string{grep}, false) 390 } 391 392 func (git *git) fetchCommits(since, base, user, domain string, greps []string, fixedStrings bool) ([]*Commit, error) { 393 const commitSeparator = "---===syzkaller-commit-separator===---" 394 args := []string{"log", "--since", since, "--format=%H%n%s%n%ae%n%an%n%ad%n%P%n%cd%n%b%n" + commitSeparator} 395 if fixedStrings { 396 args = append(args, "--fixed-strings") 397 } 398 for _, grep := range greps { 399 args = append(args, "--grep", grep) 400 } 401 args = append(args, base) 402 cmd := exec.Command("git", args...) 403 cmd.Dir = git.dir 404 cmd.Env = filterEnv() 405 if git.sandbox { 406 if err := osutil.Sandbox(cmd, true, false); err != nil { 407 return nil, err 408 } 409 } 410 stdout, err := cmd.StdoutPipe() 411 if err != nil { 412 return nil, err 413 } 414 if err := cmd.Start(); err != nil { 415 return nil, err 416 } 417 defer cmd.Wait() 418 defer cmd.Process.Kill() 419 var ( 420 s = bufio.NewScanner(stdout) 421 buf = new(bytes.Buffer) 422 separator = []byte(commitSeparator) 423 commits []*Commit 424 userBytes []byte 425 domainBytes []byte 426 ) 427 if user != "" { 428 userBytes = []byte(user + "+") 429 domainBytes = []byte(domain) 430 } 431 for s.Scan() { 432 ln := s.Bytes() 433 if !bytes.Equal(ln, separator) { 434 buf.Write(ln) 435 buf.WriteByte('\n') 436 continue 437 } 438 com, err := gitParseCommit(buf.Bytes(), userBytes, domainBytes, git.ignoreCC) 439 if err != nil { 440 return nil, err 441 } 442 if user == "" || len(com.Tags) != 0 { 443 commits = append(commits, com) 444 } 445 buf.Reset() 446 } 447 return commits, s.Err() 448 } 449 450 func (git *git) git(args ...string) ([]byte, error) { 451 cmd := osutil.Command("git", args...) 452 cmd.Dir = git.dir 453 cmd.Env = filterEnv() 454 if git.sandbox { 455 if err := osutil.Sandbox(cmd, true, false); err != nil { 456 return nil, err 457 } 458 } 459 return osutil.Run(3*time.Hour, cmd) 460 } 461 462 func splitEmail(email string) (user, domain string, err error) { 463 addr, err := mail.ParseAddress(email) 464 if err != nil { 465 return "", "", err 466 } 467 at := strings.IndexByte(addr.Address, '@') 468 if at == -1 { 469 return "", "", fmt.Errorf("no @ in email address") 470 } 471 user = addr.Address[:at] 472 domain = addr.Address[at:] 473 if plus := strings.IndexByte(user, '+'); plus != -1 { 474 user = user[:plus] 475 } 476 return 477 } 478 479 func (git *git) Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult, 480 error)) ([]*Commit, error) { 481 git.reset() 482 firstBad, err := git.getCommit(bad) 483 if err != nil { 484 return nil, err 485 } 486 output, err := git.git("bisect", "start", bad, good) 487 if err != nil { 488 return nil, err 489 } 490 defer git.reset() 491 dt.Log("# git bisect start %v %v\n%s", bad, good, output) 492 current, err := git.HeadCommit() 493 if err != nil { 494 return nil, err 495 } 496 var bisectTerms = [...]string{ 497 BisectBad: "bad", 498 BisectGood: "good", 499 BisectSkip: "skip", 500 } 501 for { 502 res, err := pred() 503 // Linux EnvForCommit may cherry-pick some fixes, reset these before the next step. 504 git.git("reset", "--hard") 505 if err != nil { 506 return nil, err 507 } 508 if res == BisectBad { 509 firstBad = current 510 } 511 output, err = git.git("bisect", bisectTerms[res]) 512 dt.Log("# git bisect %v %v\n%s", bisectTerms[res], current.Hash, output) 513 if err != nil { 514 if bytes.Contains(output, []byte("There are only 'skip'ped commits left to test")) { 515 return git.bisectInconclusive(output) 516 } 517 return nil, err 518 } 519 next, err := git.HeadCommit() 520 if err != nil { 521 return nil, err 522 } 523 if current.Hash == next.Hash { 524 return []*Commit{firstBad}, nil 525 } 526 current = next 527 } 528 } 529 530 var gitFullHashRe = regexp.MustCompile("[a-f0-9]{40}") 531 532 func (git *git) bisectInconclusive(output []byte) ([]*Commit, error) { 533 // For inconclusive bisection git prints the following message: 534 // 535 // There are only 'skip'ped commits left to test. 536 // The first bad commit could be any of: 537 // 1f43f400a2cbb02f3d34de8fe30075c070254816 538 // 4d96e13ee9cd1f7f801e8c7f4b12f09d1da4a5d8 539 // 5cd856a5ef9aa189df757c322be34ad735a5b17f 540 // We cannot bisect more! 541 // 542 // For conclusive bisection: 543 // 544 // 7c3850adbcccc2c6c9e7ab23a7dcbc4926ee5b96 is the first bad commit 545 var commits []*Commit 546 for _, hash := range gitFullHashRe.FindAll(output, -1) { 547 com, err := git.getCommit(string(hash)) 548 if err != nil { 549 return nil, err 550 } 551 commits = append(commits, com) 552 } 553 return commits, nil 554 } 555 556 func (git *git) ReleaseTag(commit string) (string, error) { 557 tags, err := git.previousReleaseTags(commit, true, true, true) 558 if err != nil { 559 return "", err 560 } 561 if len(tags) == 0 { 562 return "", fmt.Errorf("no release tags found for commit %v", commit) 563 } 564 return tags[0], nil 565 } 566 567 func (git *git) previousReleaseTags(commit string, self, onlyTop, includeRC bool) ([]string, error) { 568 var tags []string 569 if self { 570 output, err := git.git("tag", "--list", "--points-at", commit, "--merged", commit, "v*.*") 571 if err != nil { 572 return nil, err 573 } 574 tags = gitParseReleaseTags(output, includeRC) 575 if onlyTop && len(tags) != 0 { 576 return tags, nil 577 } 578 } 579 output, err := git.git("tag", "--no-contains", commit, "--merged", commit, "v*.*") 580 if err != nil { 581 return nil, err 582 } 583 tags1 := gitParseReleaseTags(output, includeRC) 584 tags = append(tags, tags1...) 585 if len(tags) == 0 { 586 return nil, fmt.Errorf("no release tags found for commit %v", commit) 587 } 588 return tags, nil 589 } 590 591 func (git *git) IsRelease(commit string) (bool, error) { 592 tags1, err := git.previousReleaseTags(commit, true, false, false) 593 if err != nil { 594 return false, err 595 } 596 tags2, err := git.previousReleaseTags(commit, false, false, false) 597 if err != nil { 598 return false, err 599 } 600 return len(tags1) != len(tags2), nil 601 } 602 603 func (git *git) Object(name, commit string) ([]byte, error) { 604 return git.git("show", fmt.Sprintf("%s:%s", commit, name)) 605 } 606 607 func (git *git) MergeBases(firstCommit, secondCommit string) ([]*Commit, error) { 608 output, err := git.git("merge-base", firstCommit, secondCommit) 609 if err != nil { 610 return nil, err 611 } 612 ret := []*Commit{} 613 for _, hash := range strings.Fields(string(output)) { 614 commit, err := git.getCommit(hash) 615 if err != nil { 616 return nil, err 617 } 618 ret = append(ret, commit) 619 } 620 return ret, nil 621 }