github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/vcs/vcs.go (about)

     1  // Copyright 2018 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // Package vcs provides helper functions for working with various repositories (e.g. git).
     5  package vcs
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"net/mail"
    11  	"regexp"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/google/syzkaller/dashboard/dashapi"
    18  	"github.com/google/syzkaller/pkg/debugtracer"
    19  	"github.com/google/syzkaller/pkg/osutil"
    20  	"github.com/google/syzkaller/pkg/report/crash"
    21  	"github.com/google/syzkaller/sys/targets"
    22  )
    23  
    24  type Repo interface {
    25  	// Poll checkouts the specified repository/branch.
    26  	// This involves fetching/resetting/cloning as necessary to recover from all possible problems.
    27  	// Returns hash of the HEAD commit in the specified branch.
    28  	Poll(repo, branch string) (*Commit, error)
    29  
    30  	// CheckoutBranch checkouts the specified repository/branch.
    31  	CheckoutBranch(repo, branch string) (*Commit, error)
    32  
    33  	// CheckoutCommit checkouts the specified repository on the specified commit.
    34  	CheckoutCommit(repo, commit string) (*Commit, error)
    35  
    36  	// SwitchCommit checkouts the specified commit without fetching.
    37  	SwitchCommit(commit string) (*Commit, error)
    38  
    39  	// HeadCommit returns info about the HEAD commit of the current branch of git repository.
    40  	HeadCommit() (*Commit, error)
    41  
    42  	// GetCommitByTitle finds commit info by the title. If the commit is not found, nil is returned.
    43  	// Remote is not fetched and only commits reachable from the checked out HEAD are searched
    44  	// (e.g. do CheckoutBranch before).
    45  	GetCommitByTitle(title string) (*Commit, error)
    46  
    47  	// GetCommitsByTitles is a batch version of GetCommitByTitle.
    48  	// Returns list of commits and titles of commits that are not found.
    49  	GetCommitsByTitles(titles []string) ([]*Commit, []string, error)
    50  
    51  	// ExtractFixTagsFromCommits extracts fixing tags for bugs from git log.
    52  	// Given email = "user@domain.com", it searches for tags of the form "user+tag@domain.com"
    53  	// and returns commits with these tags.
    54  	ExtractFixTagsFromCommits(baseCommit, email string) ([]*Commit, error)
    55  
    56  	// ReleaseTag returns the latest release tag that is reachable from the given commit.
    57  	ReleaseTag(commit string) (string, error)
    58  
    59  	// Returns true if the current tree contains the specified commit.
    60  	// Remote is not fetched and only commits reachable from the checked out HEAD are searched
    61  	// (e.g. do CheckoutBranch before).
    62  	Contains(commit string) (bool, error)
    63  
    64  	// ListCommitHashes lists all commit hashes reachable from baseCommit.
    65  	ListCommitHashes(baseCommit string) ([]string, error)
    66  
    67  	// Object returns the contents of a git repository object at the particular moment in history.
    68  	Object(name, commit string) ([]byte, error)
    69  
    70  	// MergeBases returns good common ancestors of the two commits.
    71  	MergeBases(firstCommit, secondCommit string) ([]*Commit, error)
    72  }
    73  
    74  // Bisecter may be optionally implemented by Repo.
    75  type Bisecter interface {
    76  	// Can be used for last minute preparations like pulling release tags into the bisected repo, which
    77  	// is required to determin the compiler version to use on linux. Can be an empty function.
    78  	PrepareBisect() error
    79  
    80  	// Bisect bisects good..bad commit range against the provided predicate (wrapper around git bisect).
    81  	// The predicate should return an error only if there is no way to proceed
    82  	// (it will abort the process), if possible it should prefer to return BisectSkip.
    83  	// Progress of the process is streamed to the provided trace.
    84  	// Returns the first commit on which the predicate returns BisectBad,
    85  	// or multiple commits if bisection is inconclusive due to BisectSkip.
    86  	Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult, error)) ([]*Commit, error)
    87  
    88  	// PreviousReleaseTags returns list of preceding release tags that are reachable from the given commit.
    89  	// If the commit itself has a release tag, this tag is not included.
    90  	PreviousReleaseTags(commit, compilerType string) ([]string, error)
    91  
    92  	IsRelease(commit string) (bool, error)
    93  
    94  	EnvForCommit(defaultCompiler, compilerType, binDir, commit string,
    95  		kernelConfig []byte, backports []BackportCommit) (*BisectEnv, error)
    96  }
    97  
    98  type ConfigMinimizer interface {
    99  	Minimize(target *targets.Target, original, baseline []byte, types []crash.Type,
   100  		dt debugtracer.DebugTracer, pred func(test []byte) (BisectResult, error)) ([]byte, error)
   101  }
   102  
   103  type Commit struct {
   104  	Hash       string
   105  	Title      string
   106  	Author     string
   107  	AuthorName string
   108  	Recipients Recipients
   109  	Tags       []string
   110  	Parents    []string
   111  	Date       time.Time
   112  	CommitDate time.Time
   113  }
   114  
   115  type RecipientType int
   116  
   117  const (
   118  	To RecipientType = iota
   119  	Cc
   120  )
   121  
   122  func (t RecipientType) String() string {
   123  	return [...]string{"To", "Cc"}[t]
   124  }
   125  
   126  type RecipientInfo struct {
   127  	Address mail.Address
   128  	Type    RecipientType
   129  }
   130  
   131  type Recipients []RecipientInfo
   132  
   133  func (r Recipients) GetEmails(filter RecipientType) []string {
   134  	emails := []string{}
   135  	for _, user := range r {
   136  		if user.Type == filter {
   137  			emails = append(emails, user.Address.Address)
   138  		}
   139  	}
   140  	sort.Strings(emails)
   141  	return emails
   142  }
   143  
   144  func NewRecipients(emails []string, t RecipientType) Recipients {
   145  	r := Recipients{}
   146  	for _, e := range emails {
   147  		r = append(r, RecipientInfo{mail.Address{Address: e}, t})
   148  	}
   149  	sort.Sort(r)
   150  	return r
   151  }
   152  
   153  func (r Recipients) Len() int           { return len(r) }
   154  func (r Recipients) Less(i, j int) bool { return r[i].Address.Address < r[j].Address.Address }
   155  func (r Recipients) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }
   156  
   157  func (r Recipients) ToDash() dashapi.Recipients {
   158  	d := dashapi.Recipients{}
   159  	for _, user := range r {
   160  		d = append(d, dashapi.RecipientInfo{Address: user.Address, Type: dashapi.RecipientType(user.Type)})
   161  	}
   162  	return d
   163  }
   164  
   165  type BisectResult int
   166  
   167  const (
   168  	BisectBad BisectResult = iota
   169  	BisectGood
   170  	BisectSkip
   171  )
   172  
   173  type BisectEnv struct {
   174  	Compiler     string
   175  	KernelConfig []byte
   176  }
   177  
   178  type RepoOpt int
   179  
   180  const (
   181  	// RepoPrecious is intended for command-line tools that work with a user-provided repo.
   182  	// Such repo won't be re-created to recover from errors, but rather return errors.
   183  	// If this option is not specified, the repo can be re-created from scratch to recover from any errors.
   184  	OptPrecious RepoOpt = iota
   185  	// Don't use sandboxing suitable for pkg/build.
   186  	OptDontSandbox
   187  )
   188  
   189  func NewRepo(os, vmType, dir string, opts ...RepoOpt) (Repo, error) {
   190  	switch os {
   191  	case targets.Linux:
   192  		return newLinux(dir, opts, vmType), nil
   193  	case targets.Fuchsia:
   194  		return newFuchsia(dir, opts), nil
   195  	case targets.OpenBSD:
   196  		return newGit(dir, nil, opts), nil
   197  	case targets.NetBSD:
   198  		return newGit(dir, nil, opts), nil
   199  	case targets.FreeBSD:
   200  		return newGit(dir, nil, opts), nil
   201  	case targets.TestOS:
   202  		return newTestos(dir, opts), nil
   203  	}
   204  	return nil, fmt.Errorf("vcs is unsupported for %v", os)
   205  }
   206  
   207  func NewSyzkallerRepo(dir string, opts ...RepoOpt) Repo {
   208  	git := newGit(dir, nil, append(opts, OptDontSandbox))
   209  	return git
   210  }
   211  
   212  func NewLKMLRepo(dir string) Repo {
   213  	return newGit(dir, nil, []RepoOpt{OptDontSandbox})
   214  }
   215  
   216  func Patch(dir string, patch []byte) error {
   217  	// Do --dry-run first to not mess with partially consistent state.
   218  	cmd := osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--dry-run")
   219  	if err := osutil.Sandbox(cmd, true, true); err != nil {
   220  		return err
   221  	}
   222  	cmd.Stdin = bytes.NewReader(patch)
   223  	cmd.Dir = dir
   224  	if output, err := cmd.CombinedOutput(); err != nil {
   225  		// If it reverses clean, then it's already applied
   226  		// (seems to be the easiest way to detect it).
   227  		cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--reverse", "--dry-run")
   228  		if err := osutil.Sandbox(cmd, true, true); err != nil {
   229  			return err
   230  		}
   231  		cmd.Stdin = bytes.NewReader(patch)
   232  		cmd.Dir = dir
   233  		if _, err := cmd.CombinedOutput(); err == nil {
   234  			return fmt.Errorf("patch is already applied")
   235  		}
   236  		return fmt.Errorf("failed to apply patch:\n%s", output)
   237  	}
   238  	// Now apply for real.
   239  	cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace")
   240  	if err := osutil.Sandbox(cmd, true, true); err != nil {
   241  		return err
   242  	}
   243  	cmd.Stdin = bytes.NewReader(patch)
   244  	cmd.Dir = dir
   245  	if output, err := cmd.CombinedOutput(); err != nil {
   246  		return fmt.Errorf("failed to apply patch after dry run:\n%s", output)
   247  	}
   248  	return nil
   249  }
   250  
   251  // CheckRepoAddress does a best-effort approximate check of a git repo address.
   252  func CheckRepoAddress(repo string) bool {
   253  	return gitLocalRepoRe.MatchString(repo) ||
   254  		gitRemoteRepoRe.MatchString(repo) ||
   255  		gitSSHRepoRe.MatchString(repo)
   256  }
   257  
   258  // CheckBranch does a best-effort approximate check of a git branch name.
   259  func CheckBranch(branch string) bool {
   260  	return gitBranchRe.MatchString(branch)
   261  }
   262  
   263  func CheckCommitHash(hash string) bool {
   264  	return gitHashRe.MatchString(hash)
   265  }
   266  
   267  func ParseReleaseTag(tag string) (v1, v2, rc, v3 int) {
   268  	invalid := func() {
   269  		v1, v2, rc, v3 = -1, -1, -1, -1
   270  	}
   271  	invalid()
   272  	matches := releaseTagRe.FindStringSubmatch(tag)
   273  	if matches == nil {
   274  		return
   275  	}
   276  	for ptr, idx := range map[*int]int{
   277  		&v1: 1, &v2: 2, &rc: 3, &v3: 4,
   278  	} {
   279  		if matches[idx] == "" {
   280  			continue
   281  		}
   282  		var err error
   283  		*ptr, err = strconv.Atoi(matches[idx])
   284  		if err != nil {
   285  			invalid()
   286  			return
   287  		}
   288  	}
   289  	return
   290  }
   291  
   292  func runSandboxed(dir, command string, args ...string) ([]byte, error) {
   293  	cmd := osutil.Command(command, args...)
   294  	cmd.Dir = dir
   295  	if err := osutil.Sandbox(cmd, true, false); err != nil {
   296  		return nil, err
   297  	}
   298  	return osutil.Run(time.Hour, cmd)
   299  }
   300  
   301  var (
   302  	// nolint: lll
   303  	gitLocalRepoRe = regexp.MustCompile(`^file:///[a-zA-Z0-9-_./~]+(/)?$`)
   304  	// nolint: lll
   305  	gitRemoteRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)://[a-zA-Z0-9-_.]+(:[0-9]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`)
   306  	// nolint: lll
   307  	gitSSHRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)@[a-zA-Z0-9-_.]+(:[a-zA-Z0-9-_]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`)
   308  	gitBranchRe  = regexp.MustCompile("^[a-zA-Z0-9-_/.]{2,200}$")
   309  	gitHashRe    = regexp.MustCompile("^[a-f0-9]{8,40}$")
   310  	releaseTagRe = regexp.MustCompile(`^v([0-9]+).([0-9]+)(?:-rc([0-9]+))?(?:\.([0-9]+))?$`)
   311  	// CC: is intentionally not on this list, see #1441.
   312  	ccRes = []*regexp.Regexp{
   313  		regexp.MustCompile(`^Reviewed\-.*: (.*)$`),
   314  		regexp.MustCompile(`^[A-Za-z-]+\-and\-[Rr]eviewed\-.*: (.*)$`),
   315  		regexp.MustCompile(`^Acked\-.*: (.*)$`),
   316  		regexp.MustCompile(`^[A-Za-z-]+\-and\-[Aa]cked\-.*: (.*)$`),
   317  		regexp.MustCompile(`^Tested\-.*: (.*)$`),
   318  		regexp.MustCompile(`^[A-Za-z-]+\-and\-[Tt]ested\-.*: (.*)$`),
   319  		regexp.MustCompile(`^Signed-off-by: (.*)$`),
   320  	}
   321  )
   322  
   323  // CanonicalizeCommit returns commit title that can be used when checking
   324  // if a particular commit is present in a git tree.
   325  // Some trees add prefixes to commit titles during backporting,
   326  // so we want e.g. commit "foo bar" match "BACKPORT: foo bar".
   327  func CanonicalizeCommit(title string) string {
   328  	for _, prefix := range commitPrefixes {
   329  		if strings.HasPrefix(title, prefix) {
   330  			title = title[len(prefix):]
   331  			break
   332  		}
   333  	}
   334  	return strings.TrimSpace(title)
   335  }
   336  
   337  var commitPrefixes = []string{
   338  	"UPSTREAM:",
   339  	"CHROMIUM:",
   340  	"FROMLIST:",
   341  	"BACKPORT:",
   342  	"FROMGIT:",
   343  	"net-backports:",
   344  }
   345  
   346  const SyzkallerRepo = "https://github.com/google/syzkaller"
   347  
   348  const HEAD = "HEAD"
   349  
   350  func CommitLink(url, hash string) string {
   351  	return link(url, hash, "", 0, 0)
   352  }
   353  
   354  func TreeLink(url, hash string) string {
   355  	return link(url, hash, "", 0, 1)
   356  }
   357  
   358  func LogLink(url, hash string) string {
   359  	return link(url, hash, "", 0, 2)
   360  }
   361  
   362  func FileLink(url, hash, file string, line int) string {
   363  	return link(url, hash, file, line, 3)
   364  }
   365  
   366  // nolint: goconst
   367  func link(url, hash, file string, line, typ int) string {
   368  	if url == "" || hash == "" {
   369  		return ""
   370  	}
   371  	switch url {
   372  	case "https://fuchsia.googlesource.com":
   373  		// We collect hashes from the fuchsia repo.
   374  		return link(url+"/fuchsia", hash, file, line, typ)
   375  	}
   376  	if strings.HasPrefix(url, "https://github.com/") {
   377  		url = strings.TrimSuffix(url, ".git")
   378  		switch typ {
   379  		case 1:
   380  			return url + "/tree/" + hash
   381  		case 2:
   382  			return url + "/commits/" + hash
   383  		case 3:
   384  			return url + "/blob/" + hash + "/" + file + "#L" + fmt.Sprint(line)
   385  		default:
   386  			return url + "/commit/" + hash
   387  		}
   388  	}
   389  	if strings.HasPrefix(url, "https://git.kernel.org/pub/scm/") ||
   390  		strings.HasPrefix(url, "git://git.kernel.org/pub/scm/") {
   391  		url = strings.TrimPrefix(url, "git")
   392  		url = strings.TrimPrefix(url, "https")
   393  		url = "https" + url
   394  		switch typ {
   395  		case 1:
   396  			return url + "/tree/?id=" + hash
   397  		case 2:
   398  			return url + "/log/?id=" + hash
   399  		case 3:
   400  			return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line)
   401  		default:
   402  			return url + "/commit/?id=" + hash
   403  		}
   404  	}
   405  	for _, cgitHost := range []string{"git.kernel.dk", "git.breakpoint.cc"} {
   406  		if strings.HasPrefix(url, "https://"+cgitHost) ||
   407  			strings.HasPrefix(url, "git://"+cgitHost) {
   408  			url = strings.TrimPrefix(strings.TrimPrefix(url, "git://"), "https://")
   409  			url = strings.TrimPrefix(url, cgitHost)
   410  			url = "https://" + cgitHost + "/cgit" + url
   411  			switch typ {
   412  			case 1:
   413  				return url + "/tree/?id=" + hash
   414  			case 2:
   415  				return url + "/log/?id=" + hash
   416  			case 3:
   417  				return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line)
   418  			default:
   419  				return url + "/commit/?id=" + hash
   420  			}
   421  		}
   422  	}
   423  	if strings.HasPrefix(url, "https://") && strings.Contains(url, ".googlesource.com") {
   424  		switch typ {
   425  		case 1:
   426  			return url + "/+/" + hash + "/"
   427  		case 2:
   428  			return url + "/+log/" + hash
   429  		case 3:
   430  			return url + "/+/" + hash + "/" + file + "#" + fmt.Sprint(line)
   431  		default:
   432  			return url + "/+/" + hash + "^!"
   433  		}
   434  	}
   435  	return ""
   436  }