github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/vcs/vcs.go (about) 1 // Copyright 2018 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // Package vcs provides helper functions for working with various repositories (e.g. git). 5 package vcs 6 7 import ( 8 "bytes" 9 "fmt" 10 "net/mail" 11 "regexp" 12 "sort" 13 "strconv" 14 "strings" 15 "time" 16 17 "github.com/google/syzkaller/dashboard/dashapi" 18 "github.com/google/syzkaller/pkg/debugtracer" 19 "github.com/google/syzkaller/pkg/osutil" 20 "github.com/google/syzkaller/pkg/report/crash" 21 "github.com/google/syzkaller/sys/targets" 22 ) 23 24 type Repo interface { 25 // Poll checkouts the specified repository/branch. 26 // This involves fetching/resetting/cloning as necessary to recover from all possible problems. 27 // Returns hash of the HEAD commit in the specified branch. 28 Poll(repo, branch string) (*Commit, error) 29 30 // CheckoutBranch checkouts the specified repository/branch. 31 CheckoutBranch(repo, branch string) (*Commit, error) 32 33 // CheckoutCommit checkouts the specified repository on the specified commit. 34 CheckoutCommit(repo, commit string) (*Commit, error) 35 36 // SwitchCommit checkouts the specified commit without fetching. 37 SwitchCommit(commit string) (*Commit, error) 38 39 // HeadCommit returns info about the HEAD commit of the current branch of git repository. 40 HeadCommit() (*Commit, error) 41 42 // GetCommitByTitle finds commit info by the title. If the commit is not found, nil is returned. 43 // Remote is not fetched and only commits reachable from the checked out HEAD are searched 44 // (e.g. do CheckoutBranch before). 45 GetCommitByTitle(title string) (*Commit, error) 46 47 // GetCommitsByTitles is a batch version of GetCommitByTitle. 48 // Returns list of commits and titles of commits that are not found. 49 GetCommitsByTitles(titles []string) ([]*Commit, []string, error) 50 51 // ExtractFixTagsFromCommits extracts fixing tags for bugs from git log. 52 // Given email = "user@domain.com", it searches for tags of the form "user+tag@domain.com" 53 // and returns commits with these tags. 54 ExtractFixTagsFromCommits(baseCommit, email string) ([]*Commit, error) 55 56 // ReleaseTag returns the latest release tag that is reachable from the given commit. 57 ReleaseTag(commit string) (string, error) 58 59 // Returns true if the current tree contains the specified commit. 60 // Remote is not fetched and only commits reachable from the checked out HEAD are searched 61 // (e.g. do CheckoutBranch before). 62 Contains(commit string) (bool, error) 63 64 // ListCommitHashes lists all commit hashes reachable from baseCommit. 65 ListCommitHashes(baseCommit string) ([]string, error) 66 67 // Object returns the contents of a git repository object at the particular moment in history. 68 Object(name, commit string) ([]byte, error) 69 70 // MergeBases returns good common ancestors of the two commits. 71 MergeBases(firstCommit, secondCommit string) ([]*Commit, error) 72 } 73 74 // Bisecter may be optionally implemented by Repo. 75 type Bisecter interface { 76 // Can be used for last minute preparations like pulling release tags into the bisected repo, which 77 // is required to determin the compiler version to use on linux. Can be an empty function. 78 PrepareBisect() error 79 80 // Bisect bisects good..bad commit range against the provided predicate (wrapper around git bisect). 81 // The predicate should return an error only if there is no way to proceed 82 // (it will abort the process), if possible it should prefer to return BisectSkip. 83 // Progress of the process is streamed to the provided trace. 84 // Returns the first commit on which the predicate returns BisectBad, 85 // or multiple commits if bisection is inconclusive due to BisectSkip. 86 Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult, error)) ([]*Commit, error) 87 88 // PreviousReleaseTags returns list of preceding release tags that are reachable from the given commit. 89 // If the commit itself has a release tag, this tag is not included. 90 PreviousReleaseTags(commit, compilerType string) ([]string, error) 91 92 IsRelease(commit string) (bool, error) 93 94 EnvForCommit(defaultCompiler, compilerType, binDir, commit string, 95 kernelConfig []byte, backports []BackportCommit) (*BisectEnv, error) 96 } 97 98 type ConfigMinimizer interface { 99 Minimize(target *targets.Target, original, baseline []byte, types []crash.Type, 100 dt debugtracer.DebugTracer, pred func(test []byte) (BisectResult, error)) ([]byte, error) 101 } 102 103 type Commit struct { 104 Hash string 105 Title string 106 Author string 107 AuthorName string 108 Recipients Recipients 109 Tags []string 110 Parents []string 111 Date time.Time 112 CommitDate time.Time 113 } 114 115 type RecipientType int 116 117 const ( 118 To RecipientType = iota 119 Cc 120 ) 121 122 func (t RecipientType) String() string { 123 return [...]string{"To", "Cc"}[t] 124 } 125 126 type RecipientInfo struct { 127 Address mail.Address 128 Type RecipientType 129 } 130 131 type Recipients []RecipientInfo 132 133 func (r Recipients) GetEmails(filter RecipientType) []string { 134 emails := []string{} 135 for _, user := range r { 136 if user.Type == filter { 137 emails = append(emails, user.Address.Address) 138 } 139 } 140 sort.Strings(emails) 141 return emails 142 } 143 144 func NewRecipients(emails []string, t RecipientType) Recipients { 145 r := Recipients{} 146 for _, e := range emails { 147 r = append(r, RecipientInfo{mail.Address{Address: e}, t}) 148 } 149 sort.Sort(r) 150 return r 151 } 152 153 func (r Recipients) Len() int { return len(r) } 154 func (r Recipients) Less(i, j int) bool { return r[i].Address.Address < r[j].Address.Address } 155 func (r Recipients) Swap(i, j int) { r[i], r[j] = r[j], r[i] } 156 157 func (r Recipients) ToDash() dashapi.Recipients { 158 d := dashapi.Recipients{} 159 for _, user := range r { 160 d = append(d, dashapi.RecipientInfo{Address: user.Address, Type: dashapi.RecipientType(user.Type)}) 161 } 162 return d 163 } 164 165 type BisectResult int 166 167 const ( 168 BisectBad BisectResult = iota 169 BisectGood 170 BisectSkip 171 ) 172 173 type BisectEnv struct { 174 Compiler string 175 KernelConfig []byte 176 } 177 178 type RepoOpt int 179 180 const ( 181 // RepoPrecious is intended for command-line tools that work with a user-provided repo. 182 // Such repo won't be re-created to recover from errors, but rather return errors. 183 // If this option is not specified, the repo can be re-created from scratch to recover from any errors. 184 OptPrecious RepoOpt = iota 185 // Don't use sandboxing suitable for pkg/build. 186 OptDontSandbox 187 ) 188 189 func NewRepo(os, vmType, dir string, opts ...RepoOpt) (Repo, error) { 190 switch os { 191 case targets.Linux: 192 return newLinux(dir, opts, vmType), nil 193 case targets.Fuchsia: 194 return newFuchsia(dir, opts), nil 195 case targets.OpenBSD: 196 return newGit(dir, nil, opts), nil 197 case targets.NetBSD: 198 return newGit(dir, nil, opts), nil 199 case targets.FreeBSD: 200 return newGit(dir, nil, opts), nil 201 case targets.TestOS: 202 return newTestos(dir, opts), nil 203 } 204 return nil, fmt.Errorf("vcs is unsupported for %v", os) 205 } 206 207 func NewSyzkallerRepo(dir string, opts ...RepoOpt) Repo { 208 git := newGit(dir, nil, append(opts, OptDontSandbox)) 209 return git 210 } 211 212 func NewLKMLRepo(dir string) Repo { 213 return newGit(dir, nil, []RepoOpt{OptDontSandbox}) 214 } 215 216 func Patch(dir string, patch []byte) error { 217 // Do --dry-run first to not mess with partially consistent state. 218 cmd := osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--dry-run") 219 if err := osutil.Sandbox(cmd, true, true); err != nil { 220 return err 221 } 222 cmd.Stdin = bytes.NewReader(patch) 223 cmd.Dir = dir 224 if output, err := cmd.CombinedOutput(); err != nil { 225 // If it reverses clean, then it's already applied 226 // (seems to be the easiest way to detect it). 227 cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace", "--reverse", "--dry-run") 228 if err := osutil.Sandbox(cmd, true, true); err != nil { 229 return err 230 } 231 cmd.Stdin = bytes.NewReader(patch) 232 cmd.Dir = dir 233 if _, err := cmd.CombinedOutput(); err == nil { 234 return fmt.Errorf("patch is already applied") 235 } 236 return fmt.Errorf("failed to apply patch:\n%s", output) 237 } 238 // Now apply for real. 239 cmd = osutil.Command("patch", "-p1", "--force", "--ignore-whitespace") 240 if err := osutil.Sandbox(cmd, true, true); err != nil { 241 return err 242 } 243 cmd.Stdin = bytes.NewReader(patch) 244 cmd.Dir = dir 245 if output, err := cmd.CombinedOutput(); err != nil { 246 return fmt.Errorf("failed to apply patch after dry run:\n%s", output) 247 } 248 return nil 249 } 250 251 // CheckRepoAddress does a best-effort approximate check of a git repo address. 252 func CheckRepoAddress(repo string) bool { 253 return gitLocalRepoRe.MatchString(repo) || 254 gitRemoteRepoRe.MatchString(repo) || 255 gitSSHRepoRe.MatchString(repo) 256 } 257 258 // CheckBranch does a best-effort approximate check of a git branch name. 259 func CheckBranch(branch string) bool { 260 return gitBranchRe.MatchString(branch) 261 } 262 263 func CheckCommitHash(hash string) bool { 264 return gitHashRe.MatchString(hash) 265 } 266 267 func ParseReleaseTag(tag string) (v1, v2, rc, v3 int) { 268 invalid := func() { 269 v1, v2, rc, v3 = -1, -1, -1, -1 270 } 271 invalid() 272 matches := releaseTagRe.FindStringSubmatch(tag) 273 if matches == nil { 274 return 275 } 276 for ptr, idx := range map[*int]int{ 277 &v1: 1, &v2: 2, &rc: 3, &v3: 4, 278 } { 279 if matches[idx] == "" { 280 continue 281 } 282 var err error 283 *ptr, err = strconv.Atoi(matches[idx]) 284 if err != nil { 285 invalid() 286 return 287 } 288 } 289 return 290 } 291 292 func runSandboxed(dir, command string, args ...string) ([]byte, error) { 293 cmd := osutil.Command(command, args...) 294 cmd.Dir = dir 295 if err := osutil.Sandbox(cmd, true, false); err != nil { 296 return nil, err 297 } 298 return osutil.Run(time.Hour, cmd) 299 } 300 301 var ( 302 // nolint: lll 303 gitLocalRepoRe = regexp.MustCompile(`^file:///[a-zA-Z0-9-_./~]+(/)?$`) 304 // nolint: lll 305 gitRemoteRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)://[a-zA-Z0-9-_.]+(:[0-9]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`) 306 // nolint: lll 307 gitSSHRepoRe = regexp.MustCompile(`^(git|ssh|http|https|ftp|ftps|sso)@[a-zA-Z0-9-_.]+(:[a-zA-Z0-9-_]+)?(/[a-zA-Z0-9-_./~]+)?(/)?$`) 308 gitBranchRe = regexp.MustCompile("^[a-zA-Z0-9-_/.]{2,200}$") 309 gitHashRe = regexp.MustCompile("^[a-f0-9]{8,40}$") 310 releaseTagRe = regexp.MustCompile(`^v([0-9]+).([0-9]+)(?:-rc([0-9]+))?(?:\.([0-9]+))?$`) 311 // CC: is intentionally not on this list, see #1441. 312 ccRes = []*regexp.Regexp{ 313 regexp.MustCompile(`^Reviewed\-.*: (.*)$`), 314 regexp.MustCompile(`^[A-Za-z-]+\-and\-[Rr]eviewed\-.*: (.*)$`), 315 regexp.MustCompile(`^Acked\-.*: (.*)$`), 316 regexp.MustCompile(`^[A-Za-z-]+\-and\-[Aa]cked\-.*: (.*)$`), 317 regexp.MustCompile(`^Tested\-.*: (.*)$`), 318 regexp.MustCompile(`^[A-Za-z-]+\-and\-[Tt]ested\-.*: (.*)$`), 319 regexp.MustCompile(`^Signed-off-by: (.*)$`), 320 } 321 ) 322 323 // CanonicalizeCommit returns commit title that can be used when checking 324 // if a particular commit is present in a git tree. 325 // Some trees add prefixes to commit titles during backporting, 326 // so we want e.g. commit "foo bar" match "BACKPORT: foo bar". 327 func CanonicalizeCommit(title string) string { 328 for _, prefix := range commitPrefixes { 329 if strings.HasPrefix(title, prefix) { 330 title = title[len(prefix):] 331 break 332 } 333 } 334 return strings.TrimSpace(title) 335 } 336 337 var commitPrefixes = []string{ 338 "UPSTREAM:", 339 "CHROMIUM:", 340 "FROMLIST:", 341 "BACKPORT:", 342 "FROMGIT:", 343 "net-backports:", 344 } 345 346 const SyzkallerRepo = "https://github.com/google/syzkaller" 347 348 const HEAD = "HEAD" 349 350 func CommitLink(url, hash string) string { 351 return link(url, hash, "", 0, 0) 352 } 353 354 func TreeLink(url, hash string) string { 355 return link(url, hash, "", 0, 1) 356 } 357 358 func LogLink(url, hash string) string { 359 return link(url, hash, "", 0, 2) 360 } 361 362 func FileLink(url, hash, file string, line int) string { 363 return link(url, hash, file, line, 3) 364 } 365 366 // nolint: goconst 367 func link(url, hash, file string, line, typ int) string { 368 if url == "" || hash == "" { 369 return "" 370 } 371 switch url { 372 case "https://fuchsia.googlesource.com": 373 // We collect hashes from the fuchsia repo. 374 return link(url+"/fuchsia", hash, file, line, typ) 375 } 376 if strings.HasPrefix(url, "https://github.com/") { 377 url = strings.TrimSuffix(url, ".git") 378 switch typ { 379 case 1: 380 return url + "/tree/" + hash 381 case 2: 382 return url + "/commits/" + hash 383 case 3: 384 return url + "/blob/" + hash + "/" + file + "#L" + fmt.Sprint(line) 385 default: 386 return url + "/commit/" + hash 387 } 388 } 389 if strings.HasPrefix(url, "https://git.kernel.org/pub/scm/") || 390 strings.HasPrefix(url, "git://git.kernel.org/pub/scm/") { 391 url = strings.TrimPrefix(url, "git") 392 url = strings.TrimPrefix(url, "https") 393 url = "https" + url 394 switch typ { 395 case 1: 396 return url + "/tree/?id=" + hash 397 case 2: 398 return url + "/log/?id=" + hash 399 case 3: 400 return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line) 401 default: 402 return url + "/commit/?id=" + hash 403 } 404 } 405 for _, cgitHost := range []string{"git.kernel.dk", "git.breakpoint.cc"} { 406 if strings.HasPrefix(url, "https://"+cgitHost) || 407 strings.HasPrefix(url, "git://"+cgitHost) { 408 url = strings.TrimPrefix(strings.TrimPrefix(url, "git://"), "https://") 409 url = strings.TrimPrefix(url, cgitHost) 410 url = "https://" + cgitHost + "/cgit" + url 411 switch typ { 412 case 1: 413 return url + "/tree/?id=" + hash 414 case 2: 415 return url + "/log/?id=" + hash 416 case 3: 417 return url + "/tree/" + file + "?id=" + hash + "#n" + fmt.Sprint(line) 418 default: 419 return url + "/commit/?id=" + hash 420 } 421 } 422 } 423 if strings.HasPrefix(url, "https://") && strings.Contains(url, ".googlesource.com") { 424 switch typ { 425 case 1: 426 return url + "/+/" + hash + "/" 427 case 2: 428 return url + "/+log/" + hash 429 case 3: 430 return url + "/+/" + hash + "/" + file + "#" + fmt.Sprint(line) 431 default: 432 return url + "/+/" + hash + "^!" 433 } 434 } 435 return "" 436 }