github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/vcs/git.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package vcs
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"net/mail"
    12  	"os"
    13  	"os/exec"
    14  	"regexp"
    15  	"sort"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/google/syzkaller/pkg/debugtracer"
    20  	"github.com/google/syzkaller/pkg/hash"
    21  	"github.com/google/syzkaller/pkg/log"
    22  	"github.com/google/syzkaller/pkg/osutil"
    23  )
    24  
    25  type git struct {
    26  	dir      string
    27  	ignoreCC map[string]bool
    28  	precious bool
    29  	sandbox  bool
    30  }
    31  
    32  func newGit(dir string, ignoreCC map[string]bool, opts []RepoOpt) *git {
    33  	git := &git{
    34  		dir:      dir,
    35  		ignoreCC: ignoreCC,
    36  		sandbox:  true,
    37  	}
    38  	for _, opt := range opts {
    39  		switch opt {
    40  		case OptPrecious:
    41  			git.precious = true
    42  		case OptDontSandbox:
    43  			git.sandbox = false
    44  		}
    45  	}
    46  	return git
    47  }
    48  
    49  func filterEnv() []string {
    50  	// We have to filter various git environment variables - if
    51  	// these variables are set (e.g. if a test is being run as
    52  	// part of a rebase) we're going to be acting on some other
    53  	// repository (e.g the syzkaller tree itself) rather than the
    54  	// intended repo.
    55  	env := os.Environ()
    56  	for i := 0; i < len(env); i++ {
    57  		if strings.HasPrefix(env[i], "GIT_DIR") ||
    58  			strings.HasPrefix(env[i], "GIT_WORK_TREE") ||
    59  			strings.HasPrefix(env[i], "GIT_INDEX_FILE") ||
    60  			strings.HasPrefix(env[i], "GIT_OBJECT_DIRECTORY") {
    61  			env = append(env[:i], env[i+1:]...)
    62  			i--
    63  		}
    64  	}
    65  
    66  	return env
    67  }
    68  
    69  func (git *git) Poll(repo, branch string) (*Commit, error) {
    70  	git.reset()
    71  	origin, err := git.git("remote", "get-url", "origin")
    72  	if err != nil || strings.TrimSpace(string(origin)) != repo {
    73  		// The repo is here, but it has wrong origin (e.g. repo in config has changed), re-clone.
    74  		if err := git.clone(repo, branch); err != nil {
    75  			return nil, err
    76  		}
    77  	}
    78  	// Use origin/branch for the case the branch was force-pushed,
    79  	// in such case branch is not the same is origin/branch and we will
    80  	// stuck with the local version forever (git checkout won't fail).
    81  	if _, err := git.git("checkout", "origin/"+branch); err != nil {
    82  		// No such branch (e.g. branch in config has changed), re-clone.
    83  		if err := git.clone(repo, branch); err != nil {
    84  			return nil, err
    85  		}
    86  	}
    87  	if _, err := git.git("fetch", "--force"); err != nil {
    88  		// Something else is wrong, re-clone.
    89  		if err := git.clone(repo, branch); err != nil {
    90  			return nil, err
    91  		}
    92  	}
    93  	if _, err := git.git("checkout", "origin/"+branch); err != nil {
    94  		return nil, err
    95  	}
    96  	if _, err := git.git("submodule", "update", "--init"); err != nil {
    97  		return nil, err
    98  	}
    99  	return git.HeadCommit()
   100  }
   101  
   102  func (git *git) CheckoutBranch(repo, branch string) (*Commit, error) {
   103  	if err := git.repair(); err != nil {
   104  		return nil, err
   105  	}
   106  	repoHash := hash.String([]byte(repo))
   107  	// Because the HEAD is detached, submodules assumes "origin" to be the default
   108  	// remote when initializing.
   109  	// This sets "origin" to be the current remote.
   110  	// Ignore errors as we can double add or remove the same remote and that will fail.
   111  	git.git("remote", "rm", "origin")
   112  	git.git("remote", "add", "origin", repo)
   113  	git.git("remote", "add", repoHash, repo)
   114  	_, err := git.git("fetch", "--force", repoHash, branch)
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  	if _, err := git.git("checkout", "FETCH_HEAD", "--force"); err != nil {
   119  		return nil, err
   120  	}
   121  	if _, err := git.git("submodule", "update", "--init"); err != nil {
   122  		return nil, err
   123  	}
   124  	// If the branch checkout had to be "forced" the directory may
   125  	// contain remaining untracked files.
   126  	// Clean again to ensure the new branch is in a clean state.
   127  	if err := git.repair(); err != nil {
   128  		return nil, err
   129  	}
   130  	return git.HeadCommit()
   131  }
   132  
   133  func (git *git) CheckoutCommit(repo, commit string) (*Commit, error) {
   134  	if err := git.repair(); err != nil {
   135  		return nil, err
   136  	}
   137  	if err := git.fetchRemote(repo, commit); err != nil {
   138  		return nil, err
   139  	}
   140  	return git.SwitchCommit(commit)
   141  }
   142  
   143  func (git *git) fetchRemote(repo, commit string) error {
   144  	repoHash := hash.String([]byte(repo))
   145  	// Ignore error as we can double add the same remote and that will fail.
   146  	git.git("remote", "add", repoHash, repo)
   147  	fetchArgs := []string{"fetch", "--force", "--tags", repoHash}
   148  	if commit != "" && gitFullHashRe.MatchString(commit) {
   149  		// This trick only works with full commit hashes.
   150  		fetchArgs = append(fetchArgs, commit)
   151  	}
   152  	_, err := git.git(fetchArgs...)
   153  	if err != nil {
   154  		var verbose *osutil.VerboseError
   155  		if errors.As(err, &verbose) &&
   156  			bytes.Contains(verbose.Output, []byte("error: cannot lock ref")) {
   157  			// It can happen that the fetched repo has tags names that conflict
   158  			// with the ones already present in the repository.
   159  			// Try to fetch more, but this time prune tags, it should help.
   160  			// The --prune-tags option will remove all tags that are not present
   161  			// in this remote repo, so don't do it always. Only when necessary.
   162  			_, err = git.git("fetch", "--force", "--tags", "--prune", "--prune-tags", repoHash)
   163  		}
   164  	}
   165  	return err
   166  }
   167  
   168  func (git *git) SwitchCommit(commit string) (*Commit, error) {
   169  	if !git.precious {
   170  		git.git("reset", "--hard")
   171  		git.git("clean", "-fdx")
   172  	}
   173  	if _, err := git.git("checkout", commit); err != nil {
   174  		return nil, err
   175  	}
   176  	if _, err := git.git("submodule", "update", "--init"); err != nil {
   177  		return nil, err
   178  	}
   179  	return git.HeadCommit()
   180  }
   181  
   182  func (git *git) clone(repo, branch string) error {
   183  	if git.precious {
   184  		return fmt.Errorf("won't reinit precious repo")
   185  	}
   186  	if err := git.initRepo(nil); err != nil {
   187  		return err
   188  	}
   189  	if _, err := git.git("remote", "add", "origin", repo); err != nil {
   190  		return err
   191  	}
   192  	if _, err := git.git("fetch", "origin", branch); err != nil {
   193  		return err
   194  	}
   195  	return nil
   196  }
   197  
   198  func (git *git) reset() error {
   199  	// This function tries to reset git repo state to a known clean state.
   200  	if git.precious {
   201  		return nil
   202  	}
   203  	git.git("reset", "--hard", "--recurse-submodules")
   204  	git.git("clean", "-xfdf")
   205  	git.git("submodule", "foreach", "--recursive", "git", "clean", "-xfdf")
   206  	git.git("bisect", "reset")
   207  	_, err := git.git("reset", "--hard", "--recurse-submodules")
   208  	return err
   209  }
   210  
   211  func (git *git) repair() error {
   212  	if err := git.reset(); err != nil {
   213  		return git.initRepo(err)
   214  	}
   215  	return nil
   216  }
   217  
   218  func (git *git) initRepo(reason error) error {
   219  	if reason != nil {
   220  		log.Logf(1, "git: initializing repo at %v: %v", git.dir, reason)
   221  	}
   222  	if err := os.RemoveAll(git.dir); err != nil {
   223  		return fmt.Errorf("failed to remove repo dir: %w", err)
   224  	}
   225  	if err := osutil.MkdirAll(git.dir); err != nil {
   226  		return fmt.Errorf("failed to create repo dir: %w", err)
   227  	}
   228  	if git.sandbox {
   229  		if err := osutil.SandboxChown(git.dir); err != nil {
   230  			return err
   231  		}
   232  	}
   233  	if _, err := git.git("init"); err != nil {
   234  		return err
   235  	}
   236  	return nil
   237  }
   238  
   239  func (git *git) Contains(commit string) (bool, error) {
   240  	_, err := git.git("merge-base", "--is-ancestor", commit, "HEAD")
   241  	return err == nil, nil
   242  }
   243  
   244  func (git *git) HeadCommit() (*Commit, error) {
   245  	return git.getCommit("HEAD")
   246  }
   247  
   248  func (git *git) getCommit(commit string) (*Commit, error) {
   249  	output, err := git.git("log", "--format=%H%n%s%n%ae%n%an%n%ad%n%P%n%cd%n%b", "-n", "1", commit)
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	return gitParseCommit(output, nil, nil, git.ignoreCC)
   254  }
   255  
   256  func gitParseCommit(output, user, domain []byte, ignoreCC map[string]bool) (*Commit, error) {
   257  	lines := bytes.Split(output, []byte{'\n'})
   258  	if len(lines) < 8 || len(lines[0]) != 40 {
   259  		return nil, fmt.Errorf("unexpected git log output: %q", output)
   260  	}
   261  	const dateFormat = "Mon Jan 2 15:04:05 2006 -0700"
   262  	date, err := time.Parse(dateFormat, string(lines[4]))
   263  	if err != nil {
   264  		return nil, fmt.Errorf("failed to parse date in git log output: %w\n%q", err, output)
   265  	}
   266  	commitDate, err := time.Parse(dateFormat, string(lines[6]))
   267  	if err != nil {
   268  		return nil, fmt.Errorf("failed to parse date in git log output: %w\n%q", err, output)
   269  	}
   270  	recipients := make(map[string]bool)
   271  	recipients[strings.ToLower(string(lines[2]))] = true
   272  	var tags []string
   273  	// Use summary line + all description lines.
   274  	for _, line := range append([][]byte{lines[1]}, lines[7:]...) {
   275  		if user != nil {
   276  			userPos := bytes.Index(line, user)
   277  			if userPos != -1 {
   278  				domainPos := bytes.Index(line[userPos+len(user)+1:], domain)
   279  				if domainPos != -1 {
   280  					startPos := userPos + len(user)
   281  					endPos := userPos + len(user) + domainPos + 1
   282  					tag := string(line[startPos:endPos])
   283  					present := false
   284  					for _, tag1 := range tags {
   285  						if tag1 == tag {
   286  							present = true
   287  							break
   288  						}
   289  					}
   290  					if !present {
   291  						tags = append(tags, tag)
   292  					}
   293  				}
   294  			}
   295  		}
   296  		for _, re := range ccRes {
   297  			matches := re.FindSubmatchIndex(line)
   298  			if matches == nil {
   299  				continue
   300  			}
   301  			addr, err := mail.ParseAddress(string(line[matches[2]:matches[3]]))
   302  			if err != nil {
   303  				break
   304  			}
   305  			email := strings.ToLower(addr.Address)
   306  			if ignoreCC[email] {
   307  				continue
   308  			}
   309  			recipients[email] = true
   310  			break
   311  		}
   312  	}
   313  	sortedRecipients := make(Recipients, 0, len(recipients))
   314  	for addr := range recipients {
   315  		sortedRecipients = append(sortedRecipients, RecipientInfo{mail.Address{Address: addr}, To})
   316  	}
   317  	sort.Sort(sortedRecipients)
   318  	parents := strings.Split(string(lines[5]), " ")
   319  	com := &Commit{
   320  		Hash:       string(lines[0]),
   321  		Title:      string(lines[1]),
   322  		Author:     string(lines[2]),
   323  		AuthorName: string(lines[3]),
   324  		Parents:    parents,
   325  		Recipients: sortedRecipients,
   326  		Tags:       tags,
   327  		Date:       date,
   328  		CommitDate: commitDate,
   329  	}
   330  	return com, nil
   331  }
   332  
   333  func (git *git) GetCommitByTitle(title string) (*Commit, error) {
   334  	commits, _, err := git.GetCommitsByTitles([]string{title})
   335  	if err != nil || len(commits) == 0 {
   336  		return nil, err
   337  	}
   338  	return commits[0], nil
   339  }
   340  
   341  const (
   342  	fetchCommitsMaxAgeInYears = 5
   343  )
   344  
   345  func (git *git) GetCommitsByTitles(titles []string) ([]*Commit, []string, error) {
   346  	var greps []string
   347  	m := make(map[string]string)
   348  	for _, title := range titles {
   349  		canonical := CanonicalizeCommit(title)
   350  		greps = append(greps, canonical)
   351  		m[canonical] = title
   352  	}
   353  	since := time.Now().Add(-time.Hour * 24 * 365 * fetchCommitsMaxAgeInYears).Format("01-02-2006")
   354  	commits, err := git.fetchCommits(since, "HEAD", "", "", greps, true)
   355  	if err != nil {
   356  		return nil, nil, err
   357  	}
   358  	var results []*Commit
   359  	for _, com := range commits {
   360  		canonical := CanonicalizeCommit(com.Title)
   361  		if orig := m[canonical]; orig != "" {
   362  			delete(m, canonical)
   363  			results = append(results, com)
   364  			com.Title = orig
   365  		}
   366  	}
   367  	var missing []string
   368  	for _, orig := range m {
   369  		missing = append(missing, orig)
   370  	}
   371  	return results, missing, nil
   372  }
   373  
   374  func (git *git) ListCommitHashes(baseCommit string) ([]string, error) {
   375  	output, err := git.git("log", "--pretty=format:%h", baseCommit)
   376  	if err != nil {
   377  		return nil, err
   378  	}
   379  	return strings.Split(string(output), "\n"), nil
   380  }
   381  
   382  func (git *git) ExtractFixTagsFromCommits(baseCommit, email string) ([]*Commit, error) {
   383  	user, domain, err := splitEmail(email)
   384  	if err != nil {
   385  		return nil, fmt.Errorf("failed to parse email %q: %w", email, err)
   386  	}
   387  	grep := user + "+.*" + domain
   388  	since := time.Now().Add(-time.Hour * 24 * 365 * fetchCommitsMaxAgeInYears).Format("01-02-2006")
   389  	return git.fetchCommits(since, baseCommit, user, domain, []string{grep}, false)
   390  }
   391  
   392  func (git *git) fetchCommits(since, base, user, domain string, greps []string, fixedStrings bool) ([]*Commit, error) {
   393  	const commitSeparator = "---===syzkaller-commit-separator===---"
   394  	args := []string{"log", "--since", since, "--format=%H%n%s%n%ae%n%an%n%ad%n%P%n%cd%n%b%n" + commitSeparator}
   395  	if fixedStrings {
   396  		args = append(args, "--fixed-strings")
   397  	}
   398  	for _, grep := range greps {
   399  		args = append(args, "--grep", grep)
   400  	}
   401  	args = append(args, base)
   402  	cmd := exec.Command("git", args...)
   403  	cmd.Dir = git.dir
   404  	cmd.Env = filterEnv()
   405  	if git.sandbox {
   406  		if err := osutil.Sandbox(cmd, true, false); err != nil {
   407  			return nil, err
   408  		}
   409  	}
   410  	stdout, err := cmd.StdoutPipe()
   411  	if err != nil {
   412  		return nil, err
   413  	}
   414  	if err := cmd.Start(); err != nil {
   415  		return nil, err
   416  	}
   417  	defer cmd.Wait()
   418  	defer cmd.Process.Kill()
   419  	var (
   420  		s           = bufio.NewScanner(stdout)
   421  		buf         = new(bytes.Buffer)
   422  		separator   = []byte(commitSeparator)
   423  		commits     []*Commit
   424  		userBytes   []byte
   425  		domainBytes []byte
   426  	)
   427  	if user != "" {
   428  		userBytes = []byte(user + "+")
   429  		domainBytes = []byte(domain)
   430  	}
   431  	for s.Scan() {
   432  		ln := s.Bytes()
   433  		if !bytes.Equal(ln, separator) {
   434  			buf.Write(ln)
   435  			buf.WriteByte('\n')
   436  			continue
   437  		}
   438  		com, err := gitParseCommit(buf.Bytes(), userBytes, domainBytes, git.ignoreCC)
   439  		if err != nil {
   440  			return nil, err
   441  		}
   442  		if user == "" || len(com.Tags) != 0 {
   443  			commits = append(commits, com)
   444  		}
   445  		buf.Reset()
   446  	}
   447  	return commits, s.Err()
   448  }
   449  
   450  func (git *git) git(args ...string) ([]byte, error) {
   451  	cmd := osutil.Command("git", args...)
   452  	cmd.Dir = git.dir
   453  	cmd.Env = filterEnv()
   454  	if git.sandbox {
   455  		if err := osutil.Sandbox(cmd, true, false); err != nil {
   456  			return nil, err
   457  		}
   458  	}
   459  	return osutil.Run(3*time.Hour, cmd)
   460  }
   461  
   462  func splitEmail(email string) (user, domain string, err error) {
   463  	addr, err := mail.ParseAddress(email)
   464  	if err != nil {
   465  		return "", "", err
   466  	}
   467  	at := strings.IndexByte(addr.Address, '@')
   468  	if at == -1 {
   469  		return "", "", fmt.Errorf("no @ in email address")
   470  	}
   471  	user = addr.Address[:at]
   472  	domain = addr.Address[at:]
   473  	if plus := strings.IndexByte(user, '+'); plus != -1 {
   474  		user = user[:plus]
   475  	}
   476  	return
   477  }
   478  
   479  func (git *git) Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult,
   480  	error)) ([]*Commit, error) {
   481  	git.reset()
   482  	firstBad, err := git.getCommit(bad)
   483  	if err != nil {
   484  		return nil, err
   485  	}
   486  	output, err := git.git("bisect", "start", bad, good)
   487  	if err != nil {
   488  		return nil, err
   489  	}
   490  	defer git.reset()
   491  	dt.Log("# git bisect start %v %v\n%s", bad, good, output)
   492  	current, err := git.HeadCommit()
   493  	if err != nil {
   494  		return nil, err
   495  	}
   496  	var bisectTerms = [...]string{
   497  		BisectBad:  "bad",
   498  		BisectGood: "good",
   499  		BisectSkip: "skip",
   500  	}
   501  	for {
   502  		res, err := pred()
   503  		// Linux EnvForCommit may cherry-pick some fixes, reset these before the next step.
   504  		git.git("reset", "--hard")
   505  		if err != nil {
   506  			return nil, err
   507  		}
   508  		if res == BisectBad {
   509  			firstBad = current
   510  		}
   511  		output, err = git.git("bisect", bisectTerms[res])
   512  		dt.Log("# git bisect %v %v\n%s", bisectTerms[res], current.Hash, output)
   513  		if err != nil {
   514  			if bytes.Contains(output, []byte("There are only 'skip'ped commits left to test")) {
   515  				return git.bisectInconclusive(output)
   516  			}
   517  			return nil, err
   518  		}
   519  		next, err := git.HeadCommit()
   520  		if err != nil {
   521  			return nil, err
   522  		}
   523  		if current.Hash == next.Hash {
   524  			return []*Commit{firstBad}, nil
   525  		}
   526  		current = next
   527  	}
   528  }
   529  
   530  var gitFullHashRe = regexp.MustCompile("[a-f0-9]{40}")
   531  
   532  func (git *git) bisectInconclusive(output []byte) ([]*Commit, error) {
   533  	// For inconclusive bisection git prints the following message:
   534  	//
   535  	//	There are only 'skip'ped commits left to test.
   536  	//	The first bad commit could be any of:
   537  	//	1f43f400a2cbb02f3d34de8fe30075c070254816
   538  	//	4d96e13ee9cd1f7f801e8c7f4b12f09d1da4a5d8
   539  	//	5cd856a5ef9aa189df757c322be34ad735a5b17f
   540  	//	We cannot bisect more!
   541  	//
   542  	// For conclusive bisection:
   543  	//
   544  	//	7c3850adbcccc2c6c9e7ab23a7dcbc4926ee5b96 is the first bad commit
   545  	var commits []*Commit
   546  	for _, hash := range gitFullHashRe.FindAll(output, -1) {
   547  		com, err := git.getCommit(string(hash))
   548  		if err != nil {
   549  			return nil, err
   550  		}
   551  		commits = append(commits, com)
   552  	}
   553  	return commits, nil
   554  }
   555  
   556  func (git *git) ReleaseTag(commit string) (string, error) {
   557  	tags, err := git.previousReleaseTags(commit, true, true, true)
   558  	if err != nil {
   559  		return "", err
   560  	}
   561  	if len(tags) == 0 {
   562  		return "", fmt.Errorf("no release tags found for commit %v", commit)
   563  	}
   564  	return tags[0], nil
   565  }
   566  
   567  func (git *git) previousReleaseTags(commit string, self, onlyTop, includeRC bool) ([]string, error) {
   568  	var tags []string
   569  	if self {
   570  		output, err := git.git("tag", "--list", "--points-at", commit, "--merged", commit, "v*.*")
   571  		if err != nil {
   572  			return nil, err
   573  		}
   574  		tags = gitParseReleaseTags(output, includeRC)
   575  		if onlyTop && len(tags) != 0 {
   576  			return tags, nil
   577  		}
   578  	}
   579  	output, err := git.git("tag", "--no-contains", commit, "--merged", commit, "v*.*")
   580  	if err != nil {
   581  		return nil, err
   582  	}
   583  	tags1 := gitParseReleaseTags(output, includeRC)
   584  	tags = append(tags, tags1...)
   585  	if len(tags) == 0 {
   586  		return nil, fmt.Errorf("no release tags found for commit %v", commit)
   587  	}
   588  	return tags, nil
   589  }
   590  
   591  func (git *git) IsRelease(commit string) (bool, error) {
   592  	tags1, err := git.previousReleaseTags(commit, true, false, false)
   593  	if err != nil {
   594  		return false, err
   595  	}
   596  	tags2, err := git.previousReleaseTags(commit, false, false, false)
   597  	if err != nil {
   598  		return false, err
   599  	}
   600  	return len(tags1) != len(tags2), nil
   601  }
   602  
   603  func (git *git) Object(name, commit string) ([]byte, error) {
   604  	return git.git("show", fmt.Sprintf("%s:%s", commit, name))
   605  }
   606  
   607  func (git *git) MergeBases(firstCommit, secondCommit string) ([]*Commit, error) {
   608  	output, err := git.git("merge-base", firstCommit, secondCommit)
   609  	if err != nil {
   610  		return nil, err
   611  	}
   612  	ret := []*Commit{}
   613  	for _, hash := range strings.Fields(string(output)) {
   614  		commit, err := git.getCommit(hash)
   615  		if err != nil {
   616  			return nil, err
   617  		}
   618  		ret = append(ret, commit)
   619  	}
   620  	return ret, nil
   621  }