github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/vcs/vcs.go (about)

     1  // Copyright 2018 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // Package vcs provides helper functions for working with various repositories (e.g. git).
     5  package vcs
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"net/mail"
    11  	"regexp"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/google/syzkaller/dashboard/dashapi"
    18  	"github.com/google/syzkaller/pkg/debugtracer"
    19  	"github.com/google/syzkaller/pkg/osutil"
    20  	"github.com/google/syzkaller/pkg/report/crash"
    21  	"github.com/google/syzkaller/sys/targets"
    22  )
    23  
    24  type Repo interface {
    25  	// Poll checkouts the specified repository/branch.
    26  	// This involves fetching/resetting/cloning as necessary to recover from all possible problems.
    27  	// Returns hash of the HEAD commit in the specified branch.
    28  	Poll(repo, branch string) (*Commit, error)
    29  
    30  	// CheckoutBranch checkouts the specified repository/branch.
    31  	CheckoutBranch(repo, branch string) (*Commit, error)
    32  
    33  	// CheckoutCommit checkouts the specified repository on the specified commit.
    34  	CheckoutCommit(repo, commit string) (*Commit, error)
    35  
    36  	// SwitchCommit checkouts the specified commit without fetching.
    37  	SwitchCommit(commit string) (*Commit, error)
    38  
    39  	// Commit returns info about the specified commit hash.
    40  	// The commit may be the special value HEAD for the current commit.
    41  	Commit(com string) (*Commit, error)
    42  
    43  	// GetCommitByTitle finds commit info by the title. If the commit is not found, nil is returned.
    44  	// Remote is not fetched and only commits reachable from the checked out HEAD are searched
    45  	// (e.g. do CheckoutBranch before).
    46  	GetCommitByTitle(title string) (*Commit, error)
    47  
    48  	// GetCommitsByTitles is a batch version of GetCommitByTitle.
    49  	// Returns list of commits and titles of commits that are not found.
    50  	GetCommitsByTitles(titles []string) ([]*Commit, []string, error)
    51  
    52  	// ExtractFixTagsFromCommits extracts fixing tags for bugs from git log.
    53  	// Given email = "user@domain.com", it searches for tags of the form "user+tag@domain.com"
    54  	// and returns commits with these tags.
    55  	ExtractFixTagsFromCommits(baseCommit, email string) ([]*Commit, error)
    56  
    57  	// ReleaseTag returns the latest release tag that is reachable from the given commit.
    58  	ReleaseTag(commit string) (string, error)
    59  
    60  	// Returns true if the current tree contains the specified commit.
    61  	// Remote is not fetched and only commits reachable from the checked out HEAD are searched
    62  	// (e.g. do CheckoutBranch before).
    63  	Contains(commit string) (bool, error)
    64  
    65  	// LatestCommits lists all latest commit hashes well as their commit dates.
    66  	// If afterCommit is specified, the output only includes the commits from which afterCommit is reachable.
    67  	// If afterDate is specified, the output only includes the newe commits.
    68  	LatestCommits(afterCommit string, afterDate time.Time) ([]CommitShort, error)
    69  
    70  	// Object returns the contents of a git repository object at the particular moment in history.
    71  	Object(name, commit string) ([]byte, error)
    72  
    73  	// MergeBases returns good common ancestors of the two commits.
    74  	MergeBases(firstCommit, secondCommit string) ([]*Commit, error)
    75  
    76  	// CommitExists check for the commit presence in local checkout.
    77  	CommitExists(commit string) (bool, error)
    78  
    79  	// PushCommit is used to store commit in remote repo.
    80  	PushCommit(repo, commit string) error
    81  }
    82  
    83  // Bisecter may be optionally implemented by Repo.
    84  type Bisecter interface {
    85  	// Can be used for last minute preparations like pulling release tags into the bisected repo, which
    86  	// is required to determin the compiler version to use on linux. Can be an empty function.
    87  	PrepareBisect() error
    88  
    89  	// Bisect bisects good..bad commit range against the provided predicate (wrapper around git bisect).
    90  	// The predicate should return an error only if there is no way to proceed
    91  	// (it will abort the process), if possible it should prefer to return BisectSkip.
    92  	// Progress of the process is streamed to the provided trace.
    93  	// Returns the first commit on which the predicate returns BisectBad,
    94  	// or multiple commits if bisection is inconclusive due to BisectSkip.
    95  	Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult, error)) ([]*Commit, error)
    96  
    97  	// PreviousReleaseTags returns list of preceding release tags that are reachable from the given commit.
    98  	// If the commit itself has a release tag, this tag is not included.
    99  	PreviousReleaseTags(commit, compilerType string) ([]string, error)
   100  
   101  	IsRelease(commit string) (bool, error)
   102  
   103  	EnvForCommit(defaultCompiler, compilerType, binDir, commit string,
   104  		kernelConfig []byte, backports []BackportCommit) (*BisectEnv, error)
   105  }
   106  
   107  type ConfigMinimizer interface {
   108  	Minimize(target *targets.Target, original, baseline []byte, types []crash.Type,
   109  		dt debugtracer.DebugTracer, pred func(test []byte) (BisectResult, error)) ([]byte, error)
   110  }
   111  
   112  type Commit struct {
   113  	Hash       string
   114  	Title      string
   115  	Author     string
   116  	AuthorName string
   117  	Recipients Recipients
   118  	Tags       []string
   119  	Parents    []string
   120  	Date       time.Time
   121  	CommitDate time.Time
   122  	Patch      []byte
   123  }
   124  
   125  type CommitShort struct {
   126  	Hash       string
   127  	CommitDate time.Time
   128  }
   129  
   130  type RecipientType int
   131  
   132  const (
   133  	To RecipientType = iota
   134  	Cc
   135  )
   136  
   137  func (t RecipientType) String() string {
   138  	return [...]string{"To", "Cc"}[t]
   139  }
   140  
   141  type RecipientInfo struct {
   142  	Address mail.Address
   143  	Type    RecipientType
   144  }
   145  
   146  type Recipients []RecipientInfo
   147  
   148  func (r Recipients) GetEmails(filter RecipientType) []string {
   149  	emails := []string{}
   150  	for _, user := range r {
   151  		if user.Type == filter {
   152  			emails = append(emails, user.Address.Address)
   153  		}
   154  	}
   155  	sort.Strings(emails)
   156  	return emails
   157  }
   158  
   159  func NewRecipients(emails []string, t RecipientType) Recipients {
   160  	r := Recipients{}
   161  	for _, e := range emails {
   162  		r = append(r, RecipientInfo{mail.Address{Address: e}, t})
   163  	}
   164  	sort.Sort(r)
   165  	return r
   166  }
   167  
   168  func (r Recipients) Len() int           { return len(r) }
   169  func (r Recipients) Less(i, j int) bool { return r[i].Address.Address < r[j].Address.Address }
   170  func (r Recipients) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }
   171  
   172  func (r Recipients) ToDash() dashapi.Recipients {
   173  	d := dashapi.Recipients{}
   174  	for _, user := range r {
   175  		d = append(d, dashapi.RecipientInfo{Address: user.Address, Type: dashapi.RecipientType(user.Type)})
   176  	}
   177  	return d
   178  }
   179  
   180  type BisectResult int
   181  
   182  const (
   183  	BisectBad BisectResult = iota
   184  	BisectGood
   185  	BisectSkip
   186  )
   187  
   188  type BisectEnv struct {
   189  	Compiler     string
   190  	KernelConfig []byte
   191  }
   192  
   193  type RepoOpt int
   194  
   195  const (
   196  	// RepoPrecious is intended for command-line tools that work with a user-provided repo.
   197  	// Such repo won't be re-created to recover from errors, but rather return errors.
   198  	// If this option is not specified, the repo can be re-created from scratch to recover from any errors.
   199  	OptPrecious RepoOpt = iota
   200  	// Don't use sandboxing suitable for pkg/build.
   201  	OptDontSandbox
   202  )
   203  
   204  func NewRepo(os, vmType, dir string, opts ...RepoOpt) (Repo, error) {
   205  	switch os {
   206  	case targets.Linux:
   207  		if vmType == targets.Starnix {
   208  			return newFuchsia(dir, opts), nil
   209  		}
   210  		return newLinux(dir, opts, vmType), nil
   211  	case targets.Fuchsia:
   212  		return newFuchsia(dir, opts), nil
   213  	case targets.OpenBSD:
   214  		return newGitRepo(dir, nil, opts), nil
   215  	case targets.NetBSD:
   216  		return newGitRepo(dir, nil, opts), nil
   217  	case targets.FreeBSD:
   218  		return newGitRepo(dir, nil, opts), nil
   219  	case targets.TestOS:
   220  		return newTestos(dir, opts), nil
   221  	}
   222  	return nil, fmt.Errorf("vcs is unsupported for %v", os)
   223  }
   224  
   225  func NewSyzkallerRepo(dir string, opts ...RepoOpt) Repo {
   226  	git := newGitRepo(dir, nil, append(opts, OptDontSandbox))
   227  	return git
   228  }
   229  
   230  func NewLKMLRepo(dir string) Repo {
   231  	return newGitRepo(dir, nil, []RepoOpt{OptDontSandbox})
   232  }
   233  
   234  func Patch(dir string, patch []byte) error {
   235  	// Do --dry-run first to not mess with partially consistent state.
   236  	cmd := osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--dry-run")
   237  	if err := osutil.Sandbox(cmd, true, true); err != nil {
   238  		return err
   239  	}
   240  	cmd.Stdin = bytes.NewReader(patch)
   241  	cmd.Dir = dir
   242  	if output, err := cmd.CombinedOutput(); err != nil {
   243  		// If it reverses clean, then it's already applied
   244  		// (seems to be the easiest way to detect it).
   245  		cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--reverse", "--dry-run")
   246  		if err := osutil.Sandbox(cmd, true, true); err != nil {
   247  			return err
   248  		}
   249  		cmd.Stdin = bytes.NewReader(patch)
   250  		cmd.Dir = dir
   251  		if _, err := cmd.CombinedOutput(); err == nil {
   252  			return fmt.Errorf("patch is already applied")
   253  		}
   254  		return fmt.Errorf("failed to apply patch:\n%s", output)
   255  	}
   256  	// Now apply for real.
   257  	cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace")
   258  	if err := osutil.Sandbox(cmd, true, true); err != nil {
   259  		return err
   260  	}
   261  	cmd.Stdin = bytes.NewReader(patch)
   262  	cmd.Dir = dir
   263  	if output, err := cmd.CombinedOutput(); err != nil {
   264  		return fmt.Errorf("failed to apply patch after dry run:\n%s", output)
   265  	}
   266  	return nil
   267  }
   268  
   269  // CheckRepoAddress does a best-effort approximate check of a git repo address.
   270  func CheckRepoAddress(repo string) bool {
   271  	return gitLocalRepoRe.MatchString(repo) ||
   272  		gitRemoteRepoRe.MatchString(repo) ||
   273  		gitSSHRepoRe.MatchString(repo)
   274  }
   275  
   276  // CheckBranch does a best-effort approximate check of a git branch name.
   277  func CheckBranch(branch string) bool {
   278  	return gitBranchRe.MatchString(branch)
   279  }
   280  
   281  func CheckCommitHash(hash string) bool {
   282  	return gitHashRe.MatchString(hash)
   283  }
   284  
   285  func ParseReleaseTag(tag string) (v1, v2, rc, v3 int) {
   286  	invalid := func() {
   287  		v1, v2, rc, v3 = -1, -1, -1, -1
   288  	}
   289  	invalid()
   290  	matches := releaseTagRe.FindStringSubmatch(tag)
   291  	if matches == nil {
   292  		return
   293  	}
   294  	for ptr, idx := range map[*int]int{
   295  		&v1: 1, &v2: 2, &rc: 3, &v3: 4,
   296  	} {
   297  		if matches[idx] == "" {
   298  			continue
   299  		}
   300  		var err error
   301  		*ptr, err = strconv.Atoi(matches[idx])
   302  		if err != nil {
   303  			invalid()
   304  			return
   305  		}
   306  	}
   307  	return
   308  }
   309  
   310  func runSandboxed(dir, command string, args ...string) ([]byte, error) {
   311  	cmd := osutil.Command(command, args...)
   312  	cmd.Dir = dir
   313  	if err := osutil.Sandbox(cmd, true, false); err != nil {
   314  		return nil, err
   315  	}
   316  	return osutil.Run(time.Hour, cmd)
   317  }
   318  
   319  var (
   320  	// nolint: lll
   321  	gitLocalRepoRe = regexp.MustCompile(`^file:///[a-zA-Z0-9-_./~]+(/)?$`)
   322  	// nolint: lll
   323  	gitRemoteRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)://[a-zA-Z0-9-_.]+(:[0-9]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`)
   324  	// nolint: lll
   325  	gitSSHRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)@[a-zA-Z0-9-_.]+(:[a-zA-Z0-9-_]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`)
   326  	gitBranchRe  = regexp.MustCompile("^[a-zA-Z0-9-_/.]{2,200}$")
   327  	gitHashRe    = regexp.MustCompile("^[a-f0-9]{8,40}$")
   328  	releaseTagRe = regexp.MustCompile(`^v([0-9]+).([0-9]+)(?:-rc([0-9]+))?(?:\.([0-9]+))?$`)
   329  	// CC: is intentionally not on this list, see #1441.
   330  	ccRes = []*regexp.Regexp{
   331  		regexp.MustCompile(`^Reviewed\-.*: (.*)$`),
   332  		regexp.MustCompile(`^[A-Za-z-]+\-and\-[Rr]eviewed\-.*: (.*)$`),
   333  		regexp.MustCompile(`^Acked\-.*: (.*)$`),
   334  		regexp.MustCompile(`^[A-Za-z-]+\-and\-[Aa]cked\-.*: (.*)$`),
   335  		regexp.MustCompile(`^Tested\-.*: (.*)$`),
   336  		regexp.MustCompile(`^[A-Za-z-]+\-and\-[Tt]ested\-.*: (.*)$`),
   337  		regexp.MustCompile(`^Signed-off-by: (.*)$`),
   338  	}
   339  )
   340  
   341  // CanonicalizeCommit returns commit title that can be used when checking
   342  // if a particular commit is present in a git tree.
   343  // Some trees add prefixes to commit titles during backporting,
   344  // so we want e.g. commit "foo bar" match "BACKPORT: foo bar".
   345  func CanonicalizeCommit(title string) string {
   346  	for _, prefix := range commitPrefixes {
   347  		if strings.HasPrefix(title, prefix) {
   348  			title = title[len(prefix):]
   349  			break
   350  		}
   351  	}
   352  	return strings.TrimSpace(title)
   353  }
   354  
   355  var commitPrefixes = []string{
   356  	"UPSTREAM:",
   357  	"CHROMIUM:",
   358  	"FROMLIST:",
   359  	"BACKPORT:",
   360  	"FROMGIT:",
   361  	"net-backports:",
   362  }
   363  
   364  const SyzkallerRepo = "https://github.com/google/syzkaller"
   365  
   366  const HEAD = "HEAD"
   367  
   368  func CommitLink(url, hash string) string {
   369  	return link(url, hash, "", 0, 0)
   370  }
   371  
   372  // Used externally - do not remove.
   373  func TreeLink(url, hash string) string {
   374  	return link(url, hash, "", 0, 1)
   375  }
   376  
   377  func LogLink(url, hash string) string {
   378  	return link(url, hash, "", 0, 2)
   379  }
   380  
   381  func FileLink(url, hash, file string, line int) string {
   382  	return link(url, hash, file, line, 3)
   383  }
   384  
   385  // nolint: goconst
   386  func link(url, hash, file string, line, typ int) string {
   387  	if url == "" || hash == "" {
   388  		return ""
   389  	}
   390  	switch url {
   391  	case "https://fuchsia.googlesource.com":
   392  		// We collect hashes from the fuchsia repo.
   393  		return link(url+"/fuchsia", hash, file, line, typ)
   394  	}
   395  	if strings.HasPrefix(url, "https://github.com/") {
   396  		url = strings.TrimSuffix(url, ".git")
   397  		switch typ {
   398  		case 1:
   399  			return url + "/tree/" + hash
   400  		case 2:
   401  			return url + "/commits/" + hash
   402  		case 3:
   403  			return url + "/blob/" + hash + "/" + file + "#L" + fmt.Sprint(line)
   404  		default:
   405  			return url + "/commit/" + hash
   406  		}
   407  	}
   408  	if strings.HasPrefix(url, "https://git.kernel.org/pub/scm/") ||
   409  		strings.HasPrefix(url, "git://git.kernel.org/pub/scm/") {
   410  		url = strings.TrimPrefix(url, "git")
   411  		url = strings.TrimPrefix(url, "https")
   412  		url = "https" + url
   413  		switch typ {
   414  		case 1:
   415  			return url + "/tree/?id=" + hash
   416  		case 2:
   417  			return url + "/log/?id=" + hash
   418  		case 3:
   419  			return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line)
   420  		default:
   421  			return url + "/commit/?id=" + hash
   422  		}
   423  	}
   424  	for _, cgitHost := range []string{"git.kernel.dk", "git.breakpoint.cc"} {
   425  		if strings.HasPrefix(url, "https://"+cgitHost) ||
   426  			strings.HasPrefix(url, "git://"+cgitHost) {
   427  			url = strings.TrimPrefix(strings.TrimPrefix(url, "git://"), "https://")
   428  			url = strings.TrimPrefix(url, cgitHost)
   429  			url = "https://" + cgitHost + "/cgit" + url
   430  			switch typ {
   431  			case 1:
   432  				return url + "/tree/?id=" + hash
   433  			case 2:
   434  				return url + "/log/?id=" + hash
   435  			case 3:
   436  				return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line)
   437  			default:
   438  				return url + "/commit/?id=" + hash
   439  			}
   440  		}
   441  	}
   442  	if strings.HasPrefix(url, "https://") && strings.Contains(url, ".googlesource.com") {
   443  		switch typ {
   444  		case 1:
   445  			return url + "/+/" + hash + "/"
   446  		case 2:
   447  			return url + "/+log/" + hash
   448  		case 3:
   449  			return url + "/+/" + hash + "/" + file + "#" + fmt.Sprint(line)
   450  		default:
   451  			return url + "/+/" + hash + "^!"
   452  		}
   453  	}
   454  	return ""
   455  }