github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/vcs/vcs.go (about) 1 // Copyright 2018 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // Package vcs provides helper functions for working with various repositories (e.g. git). 5 package vcs 6 7 import ( 8 "bytes" 9 "fmt" 10 "net/mail" 11 "regexp" 12 "sort" 13 "strconv" 14 "strings" 15 "time" 16 17 "github.com/google/syzkaller/dashboard/dashapi" 18 "github.com/google/syzkaller/pkg/debugtracer" 19 "github.com/google/syzkaller/pkg/osutil" 20 "github.com/google/syzkaller/pkg/report/crash" 21 "github.com/google/syzkaller/sys/targets" 22 ) 23 24 type Repo interface { 25 // Poll checkouts the specified repository/branch. 26 // This involves fetching/resetting/cloning as necessary to recover from all possible problems. 27 // Returns hash of the HEAD commit in the specified branch. 28 Poll(repo, branch string) (*Commit, error) 29 30 // CheckoutBranch checkouts the specified repository/branch. 31 CheckoutBranch(repo, branch string) (*Commit, error) 32 33 // CheckoutCommit checkouts the specified repository on the specified commit. 34 CheckoutCommit(repo, commit string) (*Commit, error) 35 36 // SwitchCommit checkouts the specified commit without fetching. 37 SwitchCommit(commit string) (*Commit, error) 38 39 // Commit returns info about the specified commit hash. 40 // The commit may be the special value HEAD for the current commit. 41 Commit(com string) (*Commit, error) 42 43 // GetCommitByTitle finds commit info by the title. If the commit is not found, nil is returned. 44 // Remote is not fetched and only commits reachable from the checked out HEAD are searched 45 // (e.g. do CheckoutBranch before). 46 GetCommitByTitle(title string) (*Commit, error) 47 48 // GetCommitsByTitles is a batch version of GetCommitByTitle. 49 // Returns list of commits and titles of commits that are not found. 50 GetCommitsByTitles(titles []string) ([]*Commit, []string, error) 51 52 // ExtractFixTagsFromCommits extracts fixing tags for bugs from git log. 53 // Given email = "user@domain.com", it searches for tags of the form "user+tag@domain.com" 54 // and returns commits with these tags. 55 ExtractFixTagsFromCommits(baseCommit, email string) ([]*Commit, error) 56 57 // ReleaseTag returns the latest release tag that is reachable from the given commit. 58 ReleaseTag(commit string) (string, error) 59 60 // Returns true if the current tree contains the specified commit. 61 // Remote is not fetched and only commits reachable from the checked out HEAD are searched 62 // (e.g. do CheckoutBranch before). 63 Contains(commit string) (bool, error) 64 65 // LatestCommits lists all latest commit hashes well as their commit dates. 66 // If afterCommit is specified, the output only includes the commits from which afterCommit is reachable. 67 // If afterDate is specified, the output only includes the newe commits. 68 LatestCommits(afterCommit string, afterDate time.Time) ([]CommitShort, error) 69 70 // Object returns the contents of a git repository object at the particular moment in history. 71 Object(name, commit string) ([]byte, error) 72 73 // MergeBases returns good common ancestors of the two commits. 74 MergeBases(firstCommit, secondCommit string) ([]*Commit, error) 75 76 // CommitExists check for the commit presence in local checkout. 77 CommitExists(commit string) (bool, error) 78 79 // PushCommit is used to store commit in remote repo. 80 PushCommit(repo, commit string) error 81 } 82 83 // Bisecter may be optionally implemented by Repo. 84 type Bisecter interface { 85 // Can be used for last minute preparations like pulling release tags into the bisected repo, which 86 // is required to determin the compiler version to use on linux. Can be an empty function. 87 PrepareBisect() error 88 89 // Bisect bisects good..bad commit range against the provided predicate (wrapper around git bisect). 90 // The predicate should return an error only if there is no way to proceed 91 // (it will abort the process), if possible it should prefer to return BisectSkip. 92 // Progress of the process is streamed to the provided trace. 93 // Returns the first commit on which the predicate returns BisectBad, 94 // or multiple commits if bisection is inconclusive due to BisectSkip. 95 Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult, error)) ([]*Commit, error) 96 97 // PreviousReleaseTags returns list of preceding release tags that are reachable from the given commit. 98 // If the commit itself has a release tag, this tag is not included. 99 PreviousReleaseTags(commit, compilerType string) ([]string, error) 100 101 IsRelease(commit string) (bool, error) 102 103 EnvForCommit(defaultCompiler, compilerType, binDir, commit string, 104 kernelConfig []byte, backports []BackportCommit) (*BisectEnv, error) 105 } 106 107 type ConfigMinimizer interface { 108 Minimize(target *targets.Target, original, baseline []byte, types []crash.Type, 109 dt debugtracer.DebugTracer, pred func(test []byte) (BisectResult, error)) ([]byte, error) 110 } 111 112 type Commit struct { 113 Hash string 114 Title string 115 Author string 116 AuthorName string 117 Recipients Recipients 118 Tags []string 119 Parents []string 120 Date time.Time 121 CommitDate time.Time 122 Patch []byte 123 } 124 125 type CommitShort struct { 126 Hash string 127 CommitDate time.Time 128 } 129 130 type RecipientType int 131 132 const ( 133 To RecipientType = iota 134 Cc 135 ) 136 137 func (t RecipientType) String() string { 138 return [...]string{"To", "Cc"}[t] 139 } 140 141 type RecipientInfo struct { 142 Address mail.Address 143 Type RecipientType 144 } 145 146 type Recipients []RecipientInfo 147 148 func (r Recipients) GetEmails(filter RecipientType) []string { 149 emails := []string{} 150 for _, user := range r { 151 if user.Type == filter { 152 emails = append(emails, user.Address.Address) 153 } 154 } 155 sort.Strings(emails) 156 return emails 157 } 158 159 func NewRecipients(emails []string, t RecipientType) Recipients { 160 r := Recipients{} 161 for _, e := range emails { 162 r = append(r, RecipientInfo{mail.Address{Address: e}, t}) 163 } 164 sort.Sort(r) 165 return r 166 } 167 168 func (r Recipients) Len() int { return len(r) } 169 func (r Recipients) Less(i, j int) bool { return r[i].Address.Address < r[j].Address.Address } 170 func (r Recipients) Swap(i, j int) { r[i], r[j] = r[j], r[i] } 171 172 func (r Recipients) ToDash() dashapi.Recipients { 173 d := dashapi.Recipients{} 174 for _, user := range r { 175 d = append(d, dashapi.RecipientInfo{Address: user.Address, Type: dashapi.RecipientType(user.Type)}) 176 } 177 return d 178 } 179 180 type BisectResult int 181 182 const ( 183 BisectBad BisectResult = iota 184 BisectGood 185 BisectSkip 186 ) 187 188 type BisectEnv struct { 189 Compiler string 190 KernelConfig []byte 191 } 192 193 type RepoOpt int 194 195 const ( 196 // RepoPrecious is intended for command-line tools that work with a user-provided repo. 197 // Such repo won't be re-created to recover from errors, but rather return errors. 198 // If this option is not specified, the repo can be re-created from scratch to recover from any errors. 199 OptPrecious RepoOpt = iota 200 // Don't use sandboxing suitable for pkg/build. 201 OptDontSandbox 202 ) 203 204 func NewRepo(os, vmType, dir string, opts ...RepoOpt) (Repo, error) { 205 switch os { 206 case targets.Linux: 207 if vmType == targets.Starnix { 208 return newFuchsia(dir, opts), nil 209 } 210 return newLinux(dir, opts, vmType), nil 211 case targets.Fuchsia: 212 return newFuchsia(dir, opts), nil 213 case targets.OpenBSD: 214 return newGitRepo(dir, nil, opts), nil 215 case targets.NetBSD: 216 return newGitRepo(dir, nil, opts), nil 217 case targets.FreeBSD: 218 return newGitRepo(dir, nil, opts), nil 219 case targets.TestOS: 220 return newTestos(dir, opts), nil 221 } 222 return nil, fmt.Errorf("vcs is unsupported for %v", os) 223 } 224 225 func NewSyzkallerRepo(dir string, opts ...RepoOpt) Repo { 226 git := newGitRepo(dir, nil, append(opts, OptDontSandbox)) 227 return git 228 } 229 230 func NewLKMLRepo(dir string) Repo { 231 return newGitRepo(dir, nil, []RepoOpt{OptDontSandbox}) 232 } 233 234 func Patch(dir string, patch []byte) error { 235 // Do --dry-run first to not mess with partially consistent state. 236 cmd := osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--dry-run") 237 if err := osutil.Sandbox(cmd, true, true); err != nil { 238 return err 239 } 240 cmd.Stdin = bytes.NewReader(patch) 241 cmd.Dir = dir 242 if output, err := cmd.CombinedOutput(); err != nil { 243 // If it reverses clean, then it's already applied 244 // (seems to be the easiest way to detect it). 245 cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--reverse", "--dry-run") 246 if err := osutil.Sandbox(cmd, true, true); err != nil { 247 return err 248 } 249 cmd.Stdin = bytes.NewReader(patch) 250 cmd.Dir = dir 251 if _, err := cmd.CombinedOutput(); err == nil { 252 return fmt.Errorf("patch is already applied") 253 } 254 return fmt.Errorf("failed to apply patch:\n%s", output) 255 } 256 // Now apply for real. 257 cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace") 258 if err := osutil.Sandbox(cmd, true, true); err != nil { 259 return err 260 } 261 cmd.Stdin = bytes.NewReader(patch) 262 cmd.Dir = dir 263 if output, err := cmd.CombinedOutput(); err != nil { 264 return fmt.Errorf("failed to apply patch after dry run:\n%s", output) 265 } 266 return nil 267 } 268 269 // CheckRepoAddress does a best-effort approximate check of a git repo address. 270 func CheckRepoAddress(repo string) bool { 271 return gitLocalRepoRe.MatchString(repo) || 272 gitRemoteRepoRe.MatchString(repo) || 273 gitSSHRepoRe.MatchString(repo) 274 } 275 276 // CheckBranch does a best-effort approximate check of a git branch name. 277 func CheckBranch(branch string) bool { 278 return gitBranchRe.MatchString(branch) 279 } 280 281 func CheckCommitHash(hash string) bool { 282 return gitHashRe.MatchString(hash) 283 } 284 285 func ParseReleaseTag(tag string) (v1, v2, rc, v3 int) { 286 invalid := func() { 287 v1, v2, rc, v3 = -1, -1, -1, -1 288 } 289 invalid() 290 matches := releaseTagRe.FindStringSubmatch(tag) 291 if matches == nil { 292 return 293 } 294 for ptr, idx := range map[*int]int{ 295 &v1: 1, &v2: 2, &rc: 3, &v3: 4, 296 } { 297 if matches[idx] == "" { 298 continue 299 } 300 var err error 301 *ptr, err = strconv.Atoi(matches[idx]) 302 if err != nil { 303 invalid() 304 return 305 } 306 } 307 return 308 } 309 310 func runSandboxed(dir, command string, args ...string) ([]byte, error) { 311 cmd := osutil.Command(command, args...) 312 cmd.Dir = dir 313 if err := osutil.Sandbox(cmd, true, false); err != nil { 314 return nil, err 315 } 316 return osutil.Run(time.Hour, cmd) 317 } 318 319 var ( 320 // nolint: lll 321 gitLocalRepoRe = regexp.MustCompile(`^file:///[a-zA-Z0-9-_./~]+(/)?$`) 322 // nolint: lll 323 gitRemoteRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)://[a-zA-Z0-9-_.]+(:[0-9]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`) 324 // nolint: lll 325 gitSSHRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)@[a-zA-Z0-9-_.]+(:[a-zA-Z0-9-_]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`) 326 gitBranchRe = regexp.MustCompile("^[a-zA-Z0-9-_/.]{2,200}$") 327 gitHashRe = regexp.MustCompile("^[a-f0-9]{8,40}$") 328 releaseTagRe = regexp.MustCompile(`^v([0-9]+).([0-9]+)(?:-rc([0-9]+))?(?:\.([0-9]+))?$`) 329 // CC: is intentionally not on this list, see #1441. 330 ccRes = []*regexp.Regexp{ 331 regexp.MustCompile(`^Reviewed\-.*: (.*)$`), 332 regexp.MustCompile(`^[A-Za-z-]+\-and\-[Rr]eviewed\-.*: (.*)$`), 333 regexp.MustCompile(`^Acked\-.*: (.*)$`), 334 regexp.MustCompile(`^[A-Za-z-]+\-and\-[Aa]cked\-.*: (.*)$`), 335 regexp.MustCompile(`^Tested\-.*: (.*)$`), 336 regexp.MustCompile(`^[A-Za-z-]+\-and\-[Tt]ested\-.*: (.*)$`), 337 regexp.MustCompile(`^Signed-off-by: (.*)$`), 338 } 339 ) 340 341 // CanonicalizeCommit returns commit title that can be used when checking 342 // if a particular commit is present in a git tree. 343 // Some trees add prefixes to commit titles during backporting, 344 // so we want e.g. commit "foo bar" match "BACKPORT: foo bar". 345 func CanonicalizeCommit(title string) string { 346 for _, prefix := range commitPrefixes { 347 if strings.HasPrefix(title, prefix) { 348 title = title[len(prefix):] 349 break 350 } 351 } 352 return strings.TrimSpace(title) 353 } 354 355 var commitPrefixes = []string{ 356 "UPSTREAM:", 357 "CHROMIUM:", 358 "FROMLIST:", 359 "BACKPORT:", 360 "FROMGIT:", 361 "net-backports:", 362 } 363 364 const SyzkallerRepo = "https://github.com/google/syzkaller" 365 366 const HEAD = "HEAD" 367 368 func CommitLink(url, hash string) string { 369 return link(url, hash, "", 0, 0) 370 } 371 372 // Used externally - do not remove. 373 func TreeLink(url, hash string) string { 374 return link(url, hash, "", 0, 1) 375 } 376 377 func LogLink(url, hash string) string { 378 return link(url, hash, "", 0, 2) 379 } 380 381 func FileLink(url, hash, file string, line int) string { 382 return link(url, hash, file, line, 3) 383 } 384 385 // nolint: goconst 386 func link(url, hash, file string, line, typ int) string { 387 if url == "" || hash == "" { 388 return "" 389 } 390 switch url { 391 case "https://fuchsia.googlesource.com": 392 // We collect hashes from the fuchsia repo. 393 return link(url+"/fuchsia", hash, file, line, typ) 394 } 395 if strings.HasPrefix(url, "https://github.com/") { 396 url = strings.TrimSuffix(url, ".git") 397 switch typ { 398 case 1: 399 return url + "/tree/" + hash 400 case 2: 401 return url + "/commits/" + hash 402 case 3: 403 return url + "/blob/" + hash + "/" + file + "#L" + fmt.Sprint(line) 404 default: 405 return url + "/commit/" + hash 406 } 407 } 408 if strings.HasPrefix(url, "https://git.kernel.org/pub/scm/") || 409 strings.HasPrefix(url, "git://git.kernel.org/pub/scm/") { 410 url = strings.TrimPrefix(url, "git") 411 url = strings.TrimPrefix(url, "https") 412 url = "https" + url 413 switch typ { 414 case 1: 415 return url + "/tree/?id=" + hash 416 case 2: 417 return url + "/log/?id=" + hash 418 case 3: 419 return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line) 420 default: 421 return url + "/commit/?id=" + hash 422 } 423 } 424 for _, cgitHost := range []string{"git.kernel.dk", "git.breakpoint.cc"} { 425 if strings.HasPrefix(url, "https://"+cgitHost) || 426 strings.HasPrefix(url, "git://"+cgitHost) { 427 url = strings.TrimPrefix(strings.TrimPrefix(url, "git://"), "https://") 428 url = strings.TrimPrefix(url, cgitHost) 429 url = "https://" + cgitHost + "/cgit" + url 430 switch typ { 431 case 1: 432 return url + "/tree/?id=" + hash 433 case 2: 434 return url + "/log/?id=" + hash 435 case 3: 436 return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line) 437 default: 438 return url + "/commit/?id=" + hash 439 } 440 } 441 } 442 if strings.HasPrefix(url, "https://") && strings.Contains(url, ".googlesource.com") { 443 switch typ { 444 case 1: 445 return url + "/+/" + hash + "/" 446 case 2: 447 return url + "/+log/" + hash 448 case 3: 449 return url + "/+/" + hash + "/" + file + "#" + fmt.Sprint(line) 450 default: 451 return url + "/+/" + hash + "^!" 452 } 453 } 454 return "" 455 }