github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-ci/manager.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 import ( 7 "compress/gzip" 8 "context" 9 "crypto/sha256" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "math/rand" 15 "net/http" 16 "net/url" 17 "os" 18 "path" 19 "path/filepath" 20 "regexp" 21 "strings" 22 "time" 23 24 "github.com/google/syzkaller/dashboard/dashapi" 25 "github.com/google/syzkaller/pkg/asset" 26 "github.com/google/syzkaller/pkg/build" 27 "github.com/google/syzkaller/pkg/config" 28 "github.com/google/syzkaller/pkg/cover" 29 "github.com/google/syzkaller/pkg/gcs" 30 "github.com/google/syzkaller/pkg/hash" 31 "github.com/google/syzkaller/pkg/instance" 32 "github.com/google/syzkaller/pkg/log" 33 "github.com/google/syzkaller/pkg/mgrconfig" 34 "github.com/google/syzkaller/pkg/osutil" 35 "github.com/google/syzkaller/pkg/report" 36 "github.com/google/syzkaller/pkg/updater" 37 "github.com/google/syzkaller/pkg/vcs" 38 "github.com/google/syzkaller/prog" 39 _ "github.com/google/syzkaller/sys" 40 "github.com/google/syzkaller/sys/targets" 41 "golang.org/x/sync/errgroup" 42 ) 43 44 // This is especially slightly longer than syzkaller rebuild period. 45 // If we set kernelRebuildPeriod = updater.RebuildPeriod and both are changed 46 // during that period (or around that period), we can rebuild kernel, restart 47 // manager and then instantly shutdown everything for syzkaller update. 48 // Instead we rebuild syzkaller, restart and then rebuild kernel. 49 const kernelRebuildPeriod = updater.RebuildPeriod + time.Hour 50 51 // List of required files in kernel build (contents of latest/current dirs). 52 var imageFiles = map[string]bool{ 53 "tag": true, // serialized BuildInfo 54 "kernel.config": false, // kernel config used for build 55 "image": true, // kernel image 56 "kernel": false, 57 "initrd": false, 58 "key": false, // root ssh key for the image 59 } 60 61 func init() { 62 for _, arches := range targets.List { 63 for _, arch := range arches { 64 if arch.KernelObject != "" { 65 imageFiles["obj/"+arch.KernelObject] = false 66 } 67 } 68 } 69 } 70 71 // Manager represents a single syz-manager instance. 72 // Handles kernel polling, image rebuild and manager process management. 73 // As syzkaller builder, it maintains 2 builds: 74 // - latest: latest known good kernel build 75 // - current: currently used kernel build 76 type Manager struct { 77 name string 78 workDir string 79 kernelBuildDir string 80 kernelSrcDir string 81 currentDir string 82 latestDir string 83 configTag string 84 configData []byte 85 cfg *Config 86 repo vcs.Repo 87 mgrcfg *ManagerConfig 88 managercfg *mgrconfig.Config 89 cmd *ManagerCmd 90 dash ManagerDashapi 91 debugStorage bool 92 storage *asset.Storage 93 debug bool 94 lastBuild *dashapi.Build 95 buildFailed bool 96 lastRestarted time.Time 97 } 98 99 type ManagerDashapi interface { 100 ReportBuildError(req *dashapi.BuildErrorReq) error 101 UploadBuild(build *dashapi.Build) error 102 BuilderPoll(manager string) (*dashapi.BuilderPollResp, error) 103 LogError(name, msg string, args ...interface{}) 104 CommitPoll() (*dashapi.CommitPollResp, error) 105 UploadCommits(commits []dashapi.Commit) error 106 } 107 108 func createManager(cfg *Config, mgrcfg *ManagerConfig, debug bool) (*Manager, error) { 109 dir := osutil.Abs(filepath.Join("managers", mgrcfg.Name)) 110 err := osutil.MkdirAll(dir) 111 if err != nil { 112 log.Fatal(err) 113 } 114 if mgrcfg.RepoAlias == "" { 115 mgrcfg.RepoAlias = mgrcfg.Repo 116 } 117 118 var dash *dashapi.Dashboard 119 if cfg.DashboardAddr != "" && mgrcfg.DashboardClient != "" { 120 dash, err = dashapi.New(mgrcfg.DashboardClient, cfg.DashboardAddr, mgrcfg.DashboardKey) 121 if err != nil { 122 return nil, err 123 } 124 } 125 var assetStorage *asset.Storage 126 if !cfg.AssetStorage.IsEmpty() { 127 assetStorage, err = asset.StorageFromConfig(cfg.AssetStorage, dash) 128 if err != nil { 129 log.Fatalf("failed to create asset storage: %v", err) 130 } 131 } 132 var configData []byte 133 if mgrcfg.KernelConfig != "" { 134 if configData, err = os.ReadFile(mgrcfg.KernelConfig); err != nil { 135 return nil, err 136 } 137 } 138 kernelDir := filepath.Join(dir, "kernel") 139 repo, err := vcs.NewRepo(mgrcfg.managercfg.TargetOS, mgrcfg.managercfg.Type, kernelDir) 140 if err != nil { 141 log.Fatalf("failed to create repo for %v: %v", mgrcfg.Name, err) 142 } 143 144 mgr := &Manager{ 145 name: mgrcfg.managercfg.Name, 146 workDir: filepath.Join(dir, "workdir"), 147 kernelSrcDir: path.Join(kernelDir, mgrcfg.KernelSrcSuffix), 148 kernelBuildDir: kernelDir, 149 currentDir: filepath.Join(dir, "current"), 150 latestDir: filepath.Join(dir, "latest"), 151 configTag: hash.String(configData), 152 configData: configData, 153 cfg: cfg, 154 repo: repo, 155 mgrcfg: mgrcfg, 156 managercfg: mgrcfg.managercfg, 157 storage: assetStorage, 158 debugStorage: !cfg.AssetStorage.IsEmpty() && cfg.AssetStorage.Debug, 159 debug: debug, 160 } 161 // Leave the dashboard interface value as nil if it does not wrap a valid dashboard pointer. 162 if dash != nil { 163 mgr.dash = dash 164 } 165 166 os.RemoveAll(mgr.currentDir) 167 return mgr, nil 168 } 169 170 // Gates kernel builds, syzkaller builds and coverage report generation. 171 // Kernel builds take whole machine, so we don't run more than one at a time. 172 // Also current image build script uses some global resources (/dev/nbd0) and can't run in parallel. 173 var buildSem = osutil.NewSemaphore(1) 174 175 // Gates tests that require extra VMs. 176 // Currently we overcommit instances in such cases, so we'd like to minimize the number of 177 // simultaneous env.Test calls. 178 var testSem = osutil.NewSemaphore(1) 179 180 const fuzzingMinutesBeforeCover = 360 181 const benchUploadPeriod = 30 * time.Minute 182 183 func (mgr *Manager) loop(ctx context.Context) { 184 lastCommit := "" 185 nextBuildTime := time.Now() 186 var managerRestartTime, artifactUploadTime, benchUploadTime time.Time 187 latestInfo := mgr.checkLatest() 188 if latestInfo != nil && time.Since(latestInfo.Time) < kernelRebuildPeriod/2 && 189 mgr.managercfg.TargetOS != targets.Fuchsia { 190 // If we have a reasonably fresh build, 191 // start manager straight away and don't rebuild kernel for a while. 192 // Fuchsia is a special case: it builds with syz-executor, so if we just updated syzkaller, we need 193 // to rebuild fuchsia as well. 194 log.Logf(0, "%v: using latest image built on %v", mgr.name, latestInfo.KernelCommit) 195 managerRestartTime = latestInfo.Time 196 nextBuildTime = time.Now().Add(kernelRebuildPeriod) 197 mgr.restartManager() 198 } else if latestInfo != nil { 199 log.Logf(0, "%v: latest image is on %v", mgr.name, latestInfo.KernelCommit) 200 } 201 202 benchUploadTime = time.Now().Add(benchUploadPeriod) 203 204 ticker := time.NewTicker(updater.BuildRetryPeriod) 205 defer ticker.Stop() 206 207 loop: 208 for { 209 if time.Since(nextBuildTime) >= 0 { 210 var rebuildAfter time.Duration 211 lastCommit, latestInfo, rebuildAfter = mgr.pollAndBuild(ctx, lastCommit, latestInfo) 212 nextBuildTime = time.Now().Add(rebuildAfter) 213 } 214 if !artifactUploadTime.IsZero() && time.Now().After(artifactUploadTime) { 215 artifactUploadTime = time.Time{} 216 if err := mgr.uploadCoverReport(ctx); err != nil { 217 mgr.Errorf("failed to upload cover report: %v", err) 218 } 219 if err := mgr.uploadProgramsWithCoverage(ctx); err != nil { 220 mgr.Errorf("failed to upload programs with coverage: %v", err) 221 } 222 // Function uploadCoverStat also forces manager to drop the coverage structures to reduce memory usage. 223 // Should be the last request touching the coverage data. 224 if err := mgr.uploadCoverStat(ctx, fuzzingMinutesBeforeCover); err != nil { 225 mgr.Errorf("failed to upload coverage stat: %v", err) 226 } 227 if err := mgr.uploadCorpus(); err != nil { 228 mgr.Errorf("failed to upload corpus: %v", err) 229 } 230 } 231 if mgr.cfg.BenchUploadPath != "" && time.Now().After(benchUploadTime) { 232 benchUploadTime = time.Now().Add(benchUploadPeriod) 233 if err := mgr.uploadBenchData(ctx); err != nil { 234 mgr.Errorf("failed to upload bench: %v", err) 235 } 236 } 237 238 select { 239 case <-ctx.Done(): 240 break loop 241 default: 242 } 243 244 if latestInfo != nil && (latestInfo.Time != managerRestartTime || mgr.cmd == nil) { 245 managerRestartTime = latestInfo.Time 246 mgr.restartManager() 247 if mgr.cmd != nil { 248 artifactUploadTime = time.Now().Add(fuzzingMinutesBeforeCover * time.Minute) 249 } 250 } 251 252 select { 253 case <-ticker.C: 254 case <-ctx.Done(): 255 break loop 256 } 257 } 258 259 if mgr.cmd != nil { 260 mgr.cmd.Close() 261 mgr.cmd = nil 262 } 263 log.Logf(0, "%v: stopped", mgr.name) 264 } 265 266 func (mgr *Manager) archiveCommit(commit string) { 267 if mgr.cfg.GitArchive == "" || mgr.mgrcfg.DisableGitArchive { 268 return 269 } 270 if err := mgr.repo.PushCommit(mgr.cfg.GitArchive, commit); err != nil { 271 mgr.Errorf("%v: failed to archive commit %s from repo %s: %s", 272 mgr.name, commit, mgr.mgrcfg.Repo, err.Error()) 273 } 274 } 275 276 func (mgr *Manager) pollAndBuild(ctx context.Context, lastCommit string, latestInfo *BuildInfo) ( 277 string, *BuildInfo, time.Duration) { 278 rebuildAfter := updater.BuildRetryPeriod 279 commit, err := mgr.repo.Poll(mgr.mgrcfg.Repo, mgr.mgrcfg.Branch) 280 if err != nil { 281 mgr.buildFailed = true 282 mgr.Errorf("failed to poll: %v", err) 283 } else { 284 log.Logf(0, "%v: poll: %v", mgr.name, commit.Hash) 285 needsUpdate := (latestInfo == nil || 286 commit.Hash != latestInfo.KernelCommit || 287 mgr.configTag != latestInfo.KernelConfigTag) 288 mgr.buildFailed = needsUpdate 289 if commit.Hash != lastCommit && needsUpdate { 290 lastCommit = commit.Hash 291 select { 292 case <-buildSem.WaitC(): 293 log.Logf(0, "%v: building kernel...", mgr.name) 294 if err := mgr.build(commit); err != nil { 295 log.Logf(0, "%v: %v", mgr.name, err) 296 } else { 297 log.Logf(0, "%v: build successful", mgr.name) 298 mgr.archiveCommit(lastCommit) 299 log.Logf(0, "%v: [re]starting manager", mgr.name) 300 mgr.buildFailed = false 301 rebuildAfter = kernelRebuildPeriod 302 latestInfo = mgr.checkLatest() 303 if latestInfo == nil { 304 mgr.Errorf("failed to read build info after build") 305 } 306 } 307 buildSem.Signal() 308 case <-ctx.Done(): 309 } 310 } 311 } 312 return lastCommit, latestInfo, rebuildAfter 313 } 314 315 // BuildInfo characterizes a kernel build. 316 type BuildInfo struct { 317 Time time.Time // when the build was done 318 Tag string // unique tag combined from compiler id, kernel commit and config tag 319 CompilerID string // compiler identity string (e.g. "gcc 7.1.1") 320 KernelRepo string 321 KernelBranch string 322 KernelCommit string // git hash of kernel checkout 323 KernelCommitTitle string 324 KernelCommitDate time.Time 325 KernelConfigTag string // SHA1 hash of .config contents 326 } 327 328 func loadBuildInfo(dir string) (*BuildInfo, error) { 329 info := new(BuildInfo) 330 if err := config.LoadFile(filepath.Join(dir, "tag"), info); err != nil { 331 return nil, err 332 } 333 return info, nil 334 } 335 336 // checkLatest checks if we have a good working latest build and returns its build info. 337 // If the build is missing/broken, nil is returned. 338 func (mgr *Manager) checkLatest() *BuildInfo { 339 if !osutil.FilesExist(mgr.latestDir, imageFiles) { 340 return nil 341 } 342 info, _ := loadBuildInfo(mgr.latestDir) 343 return info 344 } 345 346 func (mgr *Manager) createBuildInfo(kernelCommit *vcs.Commit, compilerID string) *BuildInfo { 347 var tagData []byte 348 tagData = append(tagData, mgr.name...) 349 tagData = append(tagData, kernelCommit.Hash...) 350 tagData = append(tagData, compilerID...) 351 tagData = append(tagData, mgr.configTag...) 352 return &BuildInfo{ 353 Time: time.Now(), 354 Tag: hash.String(tagData), 355 CompilerID: compilerID, 356 KernelRepo: mgr.mgrcfg.Repo, 357 KernelBranch: mgr.mgrcfg.Branch, 358 KernelCommit: kernelCommit.Hash, 359 KernelCommitTitle: kernelCommit.Title, 360 KernelCommitDate: kernelCommit.CommitDate, 361 KernelConfigTag: mgr.configTag, 362 } 363 } 364 365 func (mgr *Manager) build(kernelCommit *vcs.Commit) error { 366 // We first form the whole image in tmp dir and then rename it to latest. 367 tmpDir := mgr.latestDir + ".tmp" 368 if err := os.RemoveAll(tmpDir); err != nil { 369 return fmt.Errorf("failed to remove tmp dir: %w", err) 370 } 371 if err := osutil.MkdirAll(tmpDir); err != nil { 372 return fmt.Errorf("failed to create tmp dir: %w", err) 373 } 374 params := build.Params{ 375 TargetOS: mgr.managercfg.TargetOS, 376 TargetArch: mgr.managercfg.TargetVMArch, 377 VMType: mgr.managercfg.Type, 378 KernelDir: mgr.kernelBuildDir, 379 OutputDir: tmpDir, 380 Make: mgr.mgrcfg.Make, 381 Compiler: mgr.mgrcfg.Compiler, 382 Linker: mgr.mgrcfg.Linker, 383 Ccache: mgr.mgrcfg.Ccache, 384 UserspaceDir: mgr.mgrcfg.Userspace, 385 CmdlineFile: mgr.mgrcfg.KernelCmdline, 386 SysctlFile: mgr.mgrcfg.KernelSysctl, 387 Config: mgr.configData, 388 Build: mgr.mgrcfg.Build, 389 BuildCPUs: mgr.cfg.BuildCPUs, 390 } 391 details, err := build.Image(params) 392 info := mgr.createBuildInfo(kernelCommit, details.CompilerID) 393 if err != nil { 394 rep := &report.Report{ 395 Title: fmt.Sprintf("%v build error", mgr.mgrcfg.RepoAlias), 396 } 397 var kernelError *build.KernelError 398 var verboseError *osutil.VerboseError 399 switch { 400 case errors.As(err, &kernelError): 401 rep.Report = kernelError.Report 402 rep.Output = kernelError.Output 403 rep.Recipients = kernelError.Recipients 404 case errors.As(err, &verboseError): 405 rep.Report = []byte(verboseError.Error()) 406 rep.Output = verboseError.Output 407 case errors.As(err, &build.InfraError{}): 408 return err 409 default: 410 rep.Report = []byte(err.Error()) 411 } 412 if err := mgr.reportBuildError(rep, info, tmpDir); err != nil { 413 mgr.Errorf("failed to report image error: %v", err) 414 } 415 return fmt.Errorf("kernel build failed: %w", err) 416 } 417 418 if err := config.SaveFile(filepath.Join(tmpDir, "tag"), info); err != nil { 419 return fmt.Errorf("failed to write tag file: %w", err) 420 } 421 422 if err := mgr.testImage(tmpDir, info); err != nil { 423 return err 424 } 425 426 // Now try to replace latest with our tmp dir as atomically as we can get on Linux. 427 if err := os.RemoveAll(mgr.latestDir); err != nil { 428 return fmt.Errorf("failed to remove latest dir: %w", err) 429 } 430 return osutil.Rename(tmpDir, mgr.latestDir) 431 } 432 433 const benchFileName = "bench.json" 434 435 func (mgr *Manager) restartManager() { 436 if !osutil.FilesExist(mgr.latestDir, imageFiles) { 437 mgr.Errorf("can't start manager, image files missing") 438 return 439 } 440 if mgr.cmd != nil { 441 mgr.cmd.Close() 442 mgr.cmd = nil 443 } 444 if err := osutil.LinkFiles(mgr.latestDir, mgr.currentDir, imageFiles); err != nil { 445 mgr.Errorf("failed to create current image dir: %v", err) 446 return 447 } 448 info, err := loadBuildInfo(mgr.currentDir) 449 if err != nil { 450 mgr.Errorf("failed to load build info: %v", err) 451 return 452 } 453 // HEAD might be pointing to a different commit now e.g. due to a recent failed kernel 454 // build attempt, so let's always reset it to the commit the current kernel was built at. 455 _, err = mgr.repo.CheckoutCommit(mgr.mgrcfg.Repo, info.KernelCommit) 456 if err != nil { 457 mgr.Errorf("failed to check out the last kernel commit %q: %v", info.KernelCommit, err) 458 return 459 } 460 buildTag, err := mgr.uploadBuild(info, mgr.currentDir) 461 if err != nil { 462 mgr.Errorf("failed to upload build: %v", err) 463 return 464 } 465 daysSinceCommit := time.Since(info.KernelCommitDate).Hours() / 24 466 if mgr.buildFailed && daysSinceCommit > float64(mgr.mgrcfg.MaxKernelLagDays) { 467 log.Logf(0, "%s: the kernel is now too old (%.1f days since last commit), fuzzing is stopped", 468 mgr.name, daysSinceCommit) 469 return 470 } 471 cfgFile, err := mgr.writeConfig(buildTag) 472 if err != nil { 473 mgr.Errorf("failed to create manager config: %v", err) 474 return 475 } 476 bin := filepath.FromSlash("syzkaller/current/bin/syz-manager") 477 logFile := filepath.Join(mgr.currentDir, "manager.log") 478 benchFile := filepath.Join(mgr.currentDir, benchFileName) 479 480 args := []string{"-config", cfgFile, "-vv", "1", "-bench", benchFile} 481 if mgr.debug { 482 args = append(args, "-debug") 483 } 484 mgr.cmd = NewManagerCmd(mgr.name, logFile, benchFile, mgr.Errorf, bin, args...) 485 mgr.lastRestarted = time.Now() 486 } 487 488 func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error { 489 testSem.Wait() 490 defer testSem.Signal() 491 492 log.Logf(0, "%v: testing image...", mgr.name) 493 mgrcfg, err := mgr.createTestConfig(imageDir, info) 494 if err != nil { 495 return fmt.Errorf("failed to create manager config: %w", err) 496 } 497 rep, err := instance.RunSmokeTest(mgrcfg) 498 if err != nil { 499 mgr.Errorf("%s", err) 500 return err 501 } else if rep == nil { 502 return nil 503 } 504 rep.Title = fmt.Sprintf("%v test error: %v", mgr.mgrcfg.RepoAlias, rep.Title) 505 // There are usually no duplicates for boot errors, so we reset AltTitles. 506 // But if we pass them, we would need to add the same prefix as for Title 507 // in order to avoid duping boot bugs with non-boot bugs. 508 rep.AltTitles = nil 509 if err := mgr.reportBuildError(rep, info, imageDir); err != nil { 510 mgr.Errorf("failed to report image error: %v", err) 511 } 512 return fmt.Errorf("%s", rep.Title) 513 } 514 515 func (mgr *Manager) reportBuildError(rep *report.Report, info *BuildInfo, imageDir string) error { 516 if mgr.dash == nil { 517 log.Logf(0, "%v: image testing failed: %v\n\n%s\n\n%s", 518 mgr.name, rep.Title, rep.Report, rep.Output) 519 return nil 520 } 521 build, err := mgr.createDashboardBuild(info, imageDir, "error") 522 if err != nil { 523 return err 524 } 525 if mgr.storage != nil { 526 // We have to send assets together with the other info because the report 527 // might be generated immediately. 528 uploadedAssets, err := mgr.uploadBuildAssets(build, imageDir) 529 if err == nil { 530 build.Assets = uploadedAssets 531 } else { 532 log.Logf(0, "%v: failed to upload build assets: %s", mgr.name, err) 533 } 534 } 535 req := &dashapi.BuildErrorReq{ 536 Build: *build, 537 Crash: dashapi.Crash{ 538 Title: rep.Title, 539 AltTitles: rep.AltTitles, 540 Corrupted: false, // Otherwise they get merged with other corrupted reports. 541 Recipients: rep.Recipients.ToDash(), 542 Log: rep.Output, 543 Report: rep.Report, 544 }, 545 } 546 if rep.GuiltyFile != "" { 547 req.Crash.GuiltyFiles = []string{rep.GuiltyFile} 548 } 549 if err := mgr.dash.ReportBuildError(req); err != nil { 550 return err 551 } 552 return nil 553 } 554 555 func (mgr *Manager) createTestConfig(imageDir string, info *BuildInfo) (*mgrconfig.Config, error) { 556 mgrcfg := new(mgrconfig.Config) 557 *mgrcfg = *mgr.managercfg 558 mgrcfg.Name += "-test" 559 mgrcfg.Tag = info.KernelCommit 560 mgrcfg.HTTP = "" // Don't start the HTTP server. 561 // For GCE VMs, we need to bind to a real networking interface, so no localhost. 562 mgrcfg.RPC = fmt.Sprintf(":%v", mgr.mgrcfg.testRPCPort) 563 mgrcfg.Workdir = filepath.Join(imageDir, "workdir") 564 if err := instance.SetConfigImage(mgrcfg, imageDir, true); err != nil { 565 return nil, err 566 } 567 if err := instance.OverrideVMCount(mgrcfg, 3); err != nil { 568 return nil, err 569 } 570 mgrcfg.KernelSrc = mgr.kernelSrcDir 571 if err := mgrconfig.Complete(mgrcfg); err != nil { 572 return nil, fmt.Errorf("bad manager config: %w", err) 573 } 574 return mgrcfg, nil 575 } 576 577 func (mgr *Manager) writeConfig(buildTag string) (string, error) { 578 mgrcfg := new(mgrconfig.Config) 579 *mgrcfg = *mgr.managercfg 580 581 if mgr.dash != nil { 582 mgrcfg.DashboardClient = mgr.mgrcfg.DashboardClient 583 mgrcfg.DashboardAddr = mgr.cfg.DashboardAddr 584 mgrcfg.DashboardKey = mgr.mgrcfg.DashboardKey 585 mgrcfg.AssetStorage = mgr.cfg.AssetStorage 586 } 587 if mgr.cfg.HubAddr != "" { 588 mgrcfg.HubClient = mgr.cfg.Name 589 mgrcfg.HubAddr = mgr.cfg.HubAddr 590 mgrcfg.HubKey = mgr.cfg.HubKey 591 } 592 mgrcfg.Tag = buildTag 593 mgrcfg.Workdir = mgr.workDir 594 // There's not much point in keeping disabled progs in the syz-ci corpuses. 595 // If the syscalls on some instance are enabled again, syz-hub will provide 596 // it with the missing progs over time. 597 // And, on the other hand, PreserveCorpus=false lets us disable syscalls in 598 // the least destructive way for the rest of the corpus - calls will be cut 599 // out the of programs and the leftovers will be retriaged. 600 mgrcfg.PreserveCorpus = false 601 if err := instance.SetConfigImage(mgrcfg, mgr.currentDir, false); err != nil { 602 return "", err 603 } 604 // Strictly saying this is somewhat racy as builder can concurrently 605 // update the source, or even delete and re-clone. If this causes 606 // problems, we need to make a copy of sources after build. 607 mgrcfg.KernelSrc = mgr.kernelSrcDir 608 if err := mgrconfig.Complete(mgrcfg); err != nil { 609 return "", fmt.Errorf("bad manager config: %w", err) 610 } 611 configFile := filepath.Join(mgr.currentDir, "manager.cfg") 612 if err := config.SaveFile(configFile, mgrcfg); err != nil { 613 return "", err 614 } 615 return configFile, nil 616 } 617 618 func (mgr *Manager) uploadBuild(info *BuildInfo, imageDir string) (string, error) { 619 if mgr.dash == nil { 620 // Dashboard identifies builds by unique tags that are combined 621 // from kernel tag, compiler tag and config tag. 622 // This combined tag is meaningless without dashboard, 623 // so we use kenrel tag (commit tag) because it communicates 624 // at least some useful information. 625 return info.KernelCommit, nil 626 } 627 628 build, err := mgr.createDashboardBuild(info, imageDir, "normal") 629 if err != nil { 630 return "", err 631 } 632 mgr.lastBuild = build 633 commitTitles, fixCommits, err := mgr.pollCommits(info.KernelCommit) 634 if err != nil { 635 // This is not critical for operation. 636 mgr.Errorf("failed to poll commits: %v", err) 637 } 638 build.Commits = commitTitles 639 build.FixCommits = fixCommits 640 if mgr.storage != nil { 641 // We always upload build assets -- we create a separate Build object not just for 642 // different kernel commits, but also for different syzkaller commits, configs, etc. 643 // Since we deduplicate assets by hashing, this should not be a problem -- no assets 644 // will be actually duplicated, only the records in the DB. 645 assets, err := mgr.uploadBuildAssets(build, imageDir) 646 if err != nil { 647 mgr.Errorf("failed to upload build assets: %v", err) 648 return "", err 649 } 650 build.Assets = assets 651 } 652 if err := mgr.dash.UploadBuild(build); err != nil { 653 return "", err 654 } 655 return build.ID, nil 656 } 657 658 func (mgr *Manager) createDashboardBuild(info *BuildInfo, imageDir, typ string) (*dashapi.Build, error) { 659 var kernelConfig []byte 660 if kernelConfigFile := filepath.Join(imageDir, "kernel.config"); osutil.IsExist(kernelConfigFile) { 661 var err error 662 if kernelConfig, err = os.ReadFile(kernelConfigFile); err != nil { 663 return nil, fmt.Errorf("failed to read kernel.config: %w", err) 664 } 665 } 666 // Resulting build depends on both kernel build tag and syzkaller commmit. 667 // Also mix in build type, so that image error builds are not merged into normal builds. 668 var tagData []byte 669 tagData = append(tagData, info.Tag...) 670 tagData = append(tagData, prog.GitRevisionBase...) 671 tagData = append(tagData, typ...) 672 build := &dashapi.Build{ 673 Manager: mgr.name, 674 ID: hash.String(tagData), 675 OS: mgr.managercfg.TargetOS, 676 Arch: mgr.managercfg.TargetArch, 677 VMArch: mgr.managercfg.TargetVMArch, 678 SyzkallerCommit: prog.GitRevisionBase, 679 SyzkallerCommitDate: prog.GitRevisionDate, 680 CompilerID: info.CompilerID, 681 KernelRepo: info.KernelRepo, 682 KernelBranch: info.KernelBranch, 683 KernelCommit: info.KernelCommit, 684 KernelCommitTitle: info.KernelCommitTitle, 685 KernelCommitDate: info.KernelCommitDate, 686 KernelConfig: kernelConfig, 687 } 688 return build, nil 689 } 690 691 // pollCommits asks dashboard what commits it is interested in (i.e. fixes for 692 // open bugs) and returns subset of these commits that are present in a build 693 // on commit buildCommit. 694 func (mgr *Manager) pollCommits(buildCommit string) ([]string, []dashapi.Commit, error) { 695 resp, err := mgr.dash.BuilderPoll(mgr.name) 696 if err != nil || len(resp.PendingCommits) == 0 && resp.ReportEmail == "" { 697 return nil, nil, err 698 } 699 700 // We don't want to spend too much time querying commits from the history, 701 // so let's pick a random subset of them each time. 702 const sampleCommits = 25 703 704 pendingCommits := resp.PendingCommits 705 if len(pendingCommits) > sampleCommits { 706 rand.New(rand.NewSource(time.Now().UnixNano())).Shuffle( 707 len(pendingCommits), func(i, j int) { 708 pendingCommits[i], pendingCommits[j] = 709 pendingCommits[j], pendingCommits[i] 710 }) 711 pendingCommits = pendingCommits[:sampleCommits] 712 } 713 714 var present []string 715 if len(pendingCommits) != 0 { 716 commits, _, err := mgr.repo.GetCommitsByTitles(pendingCommits) 717 if err != nil { 718 return nil, nil, err 719 } 720 m := make(map[string]bool, len(commits)) 721 for _, com := range commits { 722 m[vcs.CanonicalizeCommit(com.Title)] = true 723 } 724 for _, com := range pendingCommits { 725 if m[vcs.CanonicalizeCommit(com)] { 726 present = append(present, com) 727 } 728 } 729 } 730 var fixCommits []dashapi.Commit 731 if resp.ReportEmail != "" { 732 if !brokenRepo(mgr.mgrcfg.Repo) { 733 commits, err := mgr.repo.ExtractFixTagsFromCommits(buildCommit, resp.ReportEmail) 734 if err != nil { 735 return nil, nil, err 736 } 737 for _, com := range commits { 738 fixCommits = append(fixCommits, dashapi.Commit{ 739 Title: com.Title, 740 BugIDs: com.Tags, 741 Date: com.Date, 742 }) 743 } 744 } 745 } 746 return present, fixCommits, nil 747 } 748 749 func (mgr *Manager) backportCommits() []vcs.BackportCommit { 750 return append( 751 append([]vcs.BackportCommit{}, mgr.cfg.BisectBackports...), 752 mgr.mgrcfg.BisectBackports..., 753 ) 754 } 755 756 func (mgr *Manager) uploadBuildAssets(buildInfo *dashapi.Build, assetFolder string) ([]dashapi.NewAsset, error) { 757 if mgr.storage == nil { 758 // No reason to continue anyway. 759 return nil, fmt.Errorf("asset storage is not configured") 760 } 761 type pendingAsset struct { 762 path string 763 assetType dashapi.AssetType 764 name string 765 } 766 pending := []pendingAsset{} 767 kernelFile := filepath.Join(assetFolder, "kernel") 768 if osutil.IsExist(kernelFile) { 769 fileName := "kernel" 770 if buildInfo.OS == targets.Linux { 771 fileName = path.Base(build.LinuxKernelImage(buildInfo.Arch)) 772 } 773 pending = append(pending, pendingAsset{kernelFile, dashapi.KernelImage, fileName}) 774 } 775 imageFile := filepath.Join(assetFolder, "image") 776 if osutil.IsExist(imageFile) { 777 if mgr.managercfg.Type == "qemu" { 778 // For qemu we currently use non-bootable disk images. 779 pending = append(pending, pendingAsset{imageFile, dashapi.NonBootableDisk, 780 "non_bootable_disk.raw"}) 781 } else { 782 pending = append(pending, pendingAsset{imageFile, dashapi.BootableDisk, 783 "disk.raw"}) 784 } 785 } 786 target := mgr.managercfg.SysTarget 787 kernelObjFile := filepath.Join(assetFolder, "obj", target.KernelObject) 788 if osutil.IsExist(kernelObjFile) { 789 pending = append(pending, 790 pendingAsset{kernelObjFile, dashapi.KernelObject, target.KernelObject}) 791 } 792 // TODO: add initrd? 793 ret := []dashapi.NewAsset{} 794 for _, pendingAsset := range pending { 795 if !mgr.storage.AssetTypeEnabled(pendingAsset.assetType) { 796 continue 797 } 798 file, err := os.Open(pendingAsset.path) 799 if err != nil { 800 log.Logf(0, "failed to open an asset for uploading: %s, %s", 801 pendingAsset.path, err) 802 continue 803 } 804 if mgr.debugStorage { 805 log.Logf(0, "uploading an asset %s of type %s", 806 pendingAsset.path, pendingAsset.assetType) 807 } 808 extra := &asset.ExtraUploadArg{SkipIfExists: true} 809 hash := sha256.New() 810 if _, err := io.Copy(hash, file); err != nil { 811 log.Logf(0, "failed calculate hash for the asset %s: %s", pendingAsset.path, err) 812 continue 813 } 814 extra.UniqueTag = fmt.Sprintf("%x", hash.Sum(nil)) 815 // Now we need to go back to the beginning of the file again. 816 if _, err := file.Seek(0, io.SeekStart); err != nil { 817 log.Logf(0, "failed wind back the opened file for %s: %s", pendingAsset.path, err) 818 continue 819 } 820 info, err := mgr.storage.UploadBuildAsset(file, pendingAsset.name, 821 pendingAsset.assetType, buildInfo, extra) 822 if err != nil { 823 log.Logf(0, "failed to upload an asset: %s, %s", 824 pendingAsset.path, err) 825 continue 826 } else if mgr.debugStorage { 827 log.Logf(0, "uploaded an asset: %#v", info) 828 } 829 ret = append(ret, info) 830 } 831 return ret, nil 832 } 833 834 func (mgr *Manager) httpGET(ctx context.Context, path string) (resp *http.Response, err error) { 835 addr := mgr.managercfg.HTTP 836 if addr != "" && addr[0] == ':' { 837 addr = "127.0.0.1" + addr // in case addr is ":port" 838 } 839 client := &http.Client{ 840 Timeout: time.Hour, 841 } 842 req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://%s%s", addr, path), nil) 843 if err != nil { 844 return nil, err 845 } 846 return client.Do(req) 847 } 848 849 func (mgr *Manager) uploadCoverReport(ctx context.Context) error { 850 directUpload := mgr.managercfg.Cover && mgr.cfg.CoverUploadPath != "" 851 if mgr.storage == nil && !directUpload { 852 // Cover report uploading is disabled. 853 return nil 854 } 855 if mgr.storage != nil && directUpload { 856 return fmt.Errorf("cover report must be either uploaded directly or via asset storage") 857 } 858 // Report generation can consume lots of memory. Generate one at a time. 859 select { 860 case <-buildSem.WaitC(): 861 case <-ctx.Done(): 862 return nil 863 } 864 defer buildSem.Signal() 865 866 resp, err := mgr.httpGET(context.Background(), "/cover") 867 if err != nil { 868 return fmt.Errorf("failed to get report: %w", err) 869 } 870 defer resp.Body.Close() 871 if directUpload { 872 return uploadFile(context.Background(), nil, mgr.cfg.CoverUploadPath, mgr.name+".html", resp.Body, mgr.cfg.PublishGCS) 873 } 874 // Upload via the asset storage. 875 newAsset, err := mgr.storage.UploadBuildAsset(resp.Body, mgr.name+".html", 876 dashapi.HTMLCoverageReport, mgr.lastBuild, nil) 877 if err != nil { 878 return fmt.Errorf("failed to upload html coverage report: %w", err) 879 } 880 err = mgr.storage.ReportBuildAssets(mgr.lastBuild, newAsset) 881 if err != nil { 882 return fmt.Errorf("failed to report the html coverage report asset: %w", err) 883 } 884 return nil 885 } 886 887 type uploadOptions struct { 888 nameSuffix string 889 publish bool 890 compress bool 891 } 892 893 func (mgr *Manager) uploadCoverJSONLToGCS(ctx context.Context, gcsClient gcs.Client, mgrSrc, gcsDest string, 894 opts uploadOptions, f func(io.Writer, *json.Decoder) error) error { 895 if !mgr.managercfg.Cover || gcsDest == "" { 896 return nil 897 } 898 899 // Report generation consumes 40G RAM. Generate one at a time. 900 // TODO: remove it once #4585 (symbolization tuning) is closed 901 select { 902 case <-buildSem.WaitC(): 903 case <-ctx.Done(): 904 return nil 905 } 906 defer buildSem.Signal() 907 908 eg, egCtx := errgroup.WithContext(ctx) 909 resp, err := mgr.httpGET(egCtx, mgrSrc) 910 if err != nil { 911 return fmt.Errorf("failed to httpGet %s: %w", mgrSrc, err) 912 } 913 defer resp.Body.Close() 914 if resp.StatusCode != http.StatusOK { 915 sb := new(strings.Builder) 916 io.Copy(sb, resp.Body) 917 return fmt.Errorf("failed to GET %s, httpStatus %d: %s", 918 mgrSrc, resp.StatusCode, sb.String()) 919 } 920 921 pr, pw := io.Pipe() 922 eg.Go(func() error { 923 defer pw.Close() 924 var w io.Writer 925 w = pw 926 if opts.compress { 927 gzw := gzip.NewWriter(pw) 928 defer gzw.Close() 929 w = gzw 930 } 931 decoder := json.NewDecoder(resp.Body) 932 for decoder.More() { 933 if err := f(w, decoder); err != nil { 934 return fmt.Errorf("callback: %w", err) 935 } 936 } 937 return nil 938 }) 939 eg.Go(func() error { 940 defer pr.Close() 941 fileName := fmt.Sprintf("%s/%s%s.jsonl", mgr.mgrcfg.DashboardClient, mgr.name, opts.nameSuffix) 942 if err := uploadFile(egCtx, gcsClient, gcsDest, fileName, pr, opts.publish); err != nil { 943 return fmt.Errorf("uploadFile: %w", err) 944 } 945 return nil 946 }) 947 return eg.Wait() 948 } 949 950 func (mgr *Manager) uploadCoverStat(ctx context.Context, fuzzingMinutes int) error { 951 // Coverage report generation consumes and caches lots of memory. 952 // In the syz-ci context report generation won't be used after this point, 953 // so tell manager to flush report generator. 954 curTime := time.Now() 955 if err := mgr.uploadCoverJSONLToGCS(ctx, nil, 956 "/cover?jsonl=1&flush=1", 957 mgr.cfg.CoverPipelinePath, 958 uploadOptions{ 959 nameSuffix: time.Now().Format("-2006-01-02-15-04"), 960 publish: false, 961 compress: false, 962 }, 963 func(w io.Writer, dec *json.Decoder) error { 964 var covInfo cover.CoverageInfo 965 if err := dec.Decode(&covInfo); err != nil { 966 return fmt.Errorf("failed to decode CoverageInfo: %w", err) 967 } 968 if err := cover.WriteCIJSONLine(w, covInfo, cover.CIDetails{ 969 Version: 1, 970 Timestamp: curTime.Format(time.RFC3339Nano), 971 FuzzingMinutes: fuzzingMinutes, 972 Arch: mgr.lastBuild.Arch, 973 BuildID: mgr.lastBuild.ID, 974 Manager: mgr.name, 975 KernelRepo: mgr.lastBuild.KernelRepo, 976 KernelBranch: mgr.lastBuild.KernelBranch, 977 KernelCommit: mgr.lastBuild.KernelCommit, 978 }); err != nil { 979 return fmt.Errorf("failed to write CIJSONLine: %w", err) 980 } 981 return nil 982 }); err != nil { 983 return fmt.Errorf("mgr.uploadCoverJSONLToGCS: %w", err) 984 } 985 return nil 986 } 987 988 func (mgr *Manager) uploadProgramsWithCoverage(ctx context.Context) error { 989 if err := mgr.uploadCoverJSONLToGCS(ctx, nil, 990 "/coverprogs?jsonl=1", 991 mgr.cfg.CoverProgramsPath, 992 uploadOptions{ 993 nameSuffix: "", 994 publish: mgr.cfg.PublishGCS, 995 compress: true, 996 }, 997 func(w io.Writer, dec *json.Decoder) error { 998 var programCoverage cover.ProgramCoverage 999 if err := dec.Decode(&programCoverage); err != nil { 1000 return fmt.Errorf("cover.ProgramCoverage: %w", err) 1001 } 1002 programCoverage.Repo = mgr.lastBuild.KernelRepo 1003 programCoverage.Commit = mgr.lastBuild.KernelCommit 1004 if err := cover.WriteJSLine(w, &programCoverage); err != nil { 1005 return fmt.Errorf("cover.WriteJSLine: %w", err) 1006 } 1007 return nil 1008 }); err != nil { 1009 return fmt.Errorf("mgr.uploadCoverJSONLToGCS: %w", err) 1010 } 1011 return nil 1012 } 1013 1014 func (mgr *Manager) uploadCorpus() error { 1015 if mgr.cfg.CorpusUploadPath == "" { 1016 return nil 1017 } 1018 f, err := os.Open(filepath.Join(mgr.workDir, "corpus.db")) 1019 if err != nil { 1020 return err 1021 } 1022 defer f.Close() 1023 return uploadFile(context.Background(), nil, mgr.cfg.CorpusUploadPath, mgr.name+"-corpus.db", f, mgr.cfg.PublishGCS) 1024 } 1025 1026 func (mgr *Manager) uploadBenchData(ctx context.Context) error { 1027 if mgr.lastRestarted.IsZero() { 1028 return nil 1029 } 1030 const minUptime = 30 * time.Minute 1031 if time.Since(mgr.lastRestarted) < minUptime { 1032 // Let's guard against uploading too many benches in case if the instance constantly 1033 // restarts. 1034 return nil 1035 } 1036 f, err := os.Open(filepath.Join(mgr.currentDir, benchFileName)) 1037 if err != nil { 1038 return fmt.Errorf("failed to open bench file: %w", err) 1039 } 1040 defer f.Close() 1041 err = uploadFile(ctx, nil, mgr.cfg.BenchUploadPath+"/"+mgr.name, 1042 mgr.lastRestarted.Format("2006-01-02_15h.json"), f, false) 1043 if err != nil { 1044 return fmt.Errorf("failed to upload the bench file: %w", err) 1045 } 1046 return nil 1047 } 1048 1049 func uploadFile(ctx context.Context, gcsClient gcs.Client, dstPath, name string, file io.Reader, publish bool) error { 1050 URL, err := url.Parse(dstPath) 1051 if err != nil { 1052 return fmt.Errorf("failed to parse upload path: %w", err) 1053 } 1054 URL.Path = path.Join(URL.Path, name) 1055 URLStr := URL.String() 1056 log.Logf(0, "uploading %v to %v", name, URLStr) 1057 if strings.HasPrefix(URLStr, "http://") || 1058 strings.HasPrefix(URLStr, "https://") { 1059 if gcsClient != nil { 1060 return fmt.Errorf("gcsClient is expected to be nil for the http* requests") 1061 } 1062 return uploadFileHTTPPut(ctx, URLStr, file) 1063 } 1064 return gcs.UploadFile(ctx, file, URLStr, gcs.UploadOptions{Publish: publish, GCSClientMock: gcsClient}) 1065 } 1066 1067 func uploadFileHTTPPut(ctx context.Context, URL string, file io.Reader) error { 1068 req, err := http.NewRequestWithContext(ctx, http.MethodPut, URL, file) 1069 if err != nil { 1070 return fmt.Errorf("failed to create HTTP PUT request: %w", err) 1071 } 1072 client := &http.Client{} 1073 resp, err := client.Do(req) 1074 if err != nil { 1075 return fmt.Errorf("failed to perform HTTP PUT request: %w", err) 1076 } 1077 defer resp.Body.Close() 1078 if resp.StatusCode < 200 || resp.StatusCode > 299 { 1079 return fmt.Errorf("HTTP PUT failed with status code: %v", resp.StatusCode) 1080 } 1081 return nil 1082 } 1083 1084 // Errorf logs non-fatal error and sends it to dashboard. 1085 func (mgr *Manager) Errorf(msg string, args ...interface{}) { 1086 for _, arg := range args { 1087 err, _ := arg.(error) 1088 if err == nil { 1089 continue 1090 } 1091 if errors.Is(err, context.Canceled) { 1092 // Context cancelation-related errors only create unnecessary noise. 1093 return 1094 } 1095 } 1096 log.Errorf(mgr.name+": "+msg, args...) 1097 if mgr.dash != nil { 1098 mgr.dash.LogError(mgr.name, msg, args...) 1099 } 1100 } 1101 1102 func (mgr *ManagerConfig) validate(cfg *Config) error { 1103 // Manager name must not contain dots because it is used as GCE image name prefix. 1104 managerNameRe := regexp.MustCompile("^[a-zA-Z0-9-_]{3,64}$") 1105 if !managerNameRe.MatchString(mgr.Name) { 1106 return fmt.Errorf("param 'managers.name' has bad value: %q", mgr.Name) 1107 } 1108 if mgr.Jobs.AnyEnabled() && (cfg.DashboardAddr == "" || cfg.DashboardClient == "") { 1109 return fmt.Errorf("manager %v: has jobs but no dashboard info", mgr.Name) 1110 } 1111 if mgr.Jobs.PollCommits && (cfg.DashboardAddr == "" || mgr.DashboardClient == "") { 1112 return fmt.Errorf("manager %v: commit_poll is set but no dashboard info", mgr.Name) 1113 } 1114 if (mgr.Jobs.BisectCause || mgr.Jobs.BisectFix) && cfg.BisectBinDir == "" { 1115 return fmt.Errorf("manager %v: enabled bisection but no bisect_bin_dir", mgr.Name) 1116 } 1117 return nil 1118 }