github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/tools/syz-testbed/testbed.go (about) 1 // Copyright 2021 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // syz-testbed automatically checks out, builds and sets up a number of syzkaller instances. 5 // This might be very helpful e.g. when gauging the effect of new changes on the total syzkaller 6 // performance. 7 // For details see docs/syz_testbed.md. 8 9 package main 10 11 import ( 12 "encoding/json" 13 "flag" 14 "fmt" 15 "log" 16 "path/filepath" 17 "regexp" 18 "sync" 19 "time" 20 21 "github.com/google/syzkaller/pkg/config" 22 "github.com/google/syzkaller/pkg/osutil" 23 "github.com/google/syzkaller/pkg/tool" 24 "github.com/google/syzkaller/pkg/vcs" 25 ) 26 27 var ( 28 flagConfig = flag.String("config", "", "config file") 29 ) 30 31 type TestbedConfig struct { 32 Name string `json:"name"` // name of the testbed 33 Target string `json:"target"` // what application to test 34 MaxInstances int `json:"max_instances"` // max # of simultaneously running instances 35 RunTime DurationConfig `json:"run_time"` // lifetime of an instance (default "24h") 36 HTTP string `json:"http"` // on which port to set up a simple web dashboard 37 BenchCmp string `json:"benchcmp"` // path to the syz-benchcmp executable 38 Corpus string `json:"corpus"` // path to the corpus file 39 Workdir string `json:"workdir"` // instances will be checked out there 40 ReproConfig ReproTestConfig `json:"repro_config"` // syz-repro benchmarking config 41 ManagerConfig json.RawMessage `json:"manager_config"` // base manager config 42 Checkouts []CheckoutConfig `json:"checkouts"` 43 } 44 45 type DurationConfig struct { 46 time.Duration 47 } 48 49 type CheckoutConfig struct { 50 Name string `json:"name"` 51 Repo string `json:"repo"` 52 Branch string `json:"branch"` 53 ManagerConfig json.RawMessage `json:"manager_config"` // a patch to manager config 54 } 55 56 type ReproTestConfig struct { 57 InputLogs string `json:"input_logs"` // take crash logs from a folder 58 InputWorkdir string `json:"input_workdir"` // take crash logs from a syzkaller's workdir 59 CrashesPerBug int `json:"crashes_per_bug"` // how many crashes must be taken from each bug 60 SkipBugs []string `json:"skip_bugs"` // crashes to exclude from the workdir, list of regexps 61 } 62 63 type TestbedContext struct { 64 Config *TestbedConfig 65 Checkouts []*Checkout 66 NextCheckoutID int 67 NextInstanceID int 68 Target TestbedTarget 69 mu sync.Mutex 70 } 71 72 func main() { 73 flag.Parse() 74 cfg := &TestbedConfig{ 75 Name: "testbed", 76 Target: "syz-manager", 77 RunTime: DurationConfig{24 * time.Hour}, 78 ReproConfig: ReproTestConfig{ 79 CrashesPerBug: 1, 80 }, 81 } 82 err := config.LoadFile(*flagConfig, &cfg) 83 if err != nil { 84 tool.Failf("failed to read config: %s", err) 85 } 86 87 err = checkConfig(cfg) 88 if err != nil { 89 tool.Failf("invalid config: %s", err) 90 } 91 ctx := TestbedContext{ 92 Config: cfg, 93 Target: targetConstructors[cfg.Target](cfg), 94 } 95 go ctx.setupHTTPServer() 96 97 for _, checkoutCfg := range cfg.Checkouts { 98 mgrCfg := ctx.MakeMgrConfig(cfg.ManagerConfig, checkoutCfg.ManagerConfig) 99 co, err := ctx.NewCheckout(&checkoutCfg, mgrCfg) 100 if err != nil { 101 tool.Failf("checkout failed: %s", err) 102 } 103 ctx.Checkouts = append(ctx.Checkouts, co) 104 } 105 106 shutdown := make(chan struct{}) 107 osutil.HandleInterrupts(shutdown) 108 109 go func() { 110 const period = 90 * time.Second 111 for { 112 time.Sleep(period) 113 err := ctx.SaveStats() 114 if err != nil { 115 log.Printf("stats saving error: %s", err) 116 } 117 } 118 }() 119 120 ctx.Loop(shutdown) 121 } 122 123 func (ctx *TestbedContext) MakeMgrConfig(base, patch json.RawMessage) json.RawMessage { 124 mgrCfg, err := config.MergeJSONs(base, patch) 125 if err != nil { 126 tool.Failf("failed to apply a patch to the base manager config: %s", err) 127 } 128 // We don't care much about the specific ports of syz-managers. 129 mgrCfg, err = config.PatchJSON(mgrCfg, map[string]interface{}{"HTTP": ":0"}) 130 if err != nil { 131 tool.Failf("failed to assign empty HTTP value: %s", err) 132 } 133 return mgrCfg 134 } 135 136 func (ctx *TestbedContext) GetStatViews() ([]StatView, error) { 137 groupsCompleted := []RunResultGroup{} 138 groupsAll := []RunResultGroup{} 139 for _, checkout := range ctx.Checkouts { 140 running := checkout.GetRunningResults() 141 completed := checkout.GetCompletedResults() 142 groupsCompleted = append(groupsCompleted, RunResultGroup{ 143 Name: checkout.Name, 144 Results: completed, 145 }) 146 groupsAll = append(groupsAll, RunResultGroup{ 147 Name: checkout.Name, 148 Results: append(completed, running...), 149 }) 150 } 151 return []StatView{ 152 { 153 Name: "completed", 154 Groups: groupsCompleted, 155 }, 156 { 157 Name: "all", 158 Groups: groupsAll, 159 }, 160 }, nil 161 } 162 163 func (ctx *TestbedContext) TestbedStatsTable() *Table { 164 table := NewTable("Checkout", "Running", "Completed", "Last started") 165 for _, checkout := range ctx.Checkouts { 166 checkout.mu.Lock() 167 last := "" 168 if !checkout.LastRunning.IsZero() { 169 last = time.Since(checkout.LastRunning).Round(time.Second).String() 170 } 171 table.AddRow(checkout.Name, 172 fmt.Sprintf("%d", len(checkout.Running)), 173 fmt.Sprintf("%d", len(checkout.Completed)), 174 last, 175 ) 176 checkout.mu.Unlock() 177 } 178 return table 179 } 180 181 func (ctx *TestbedContext) SaveStats() error { 182 // Preventing concurrent saving of the stats. 183 ctx.mu.Lock() 184 defer ctx.mu.Unlock() 185 views, err := ctx.GetStatViews() 186 if err != nil { 187 return err 188 } 189 for _, view := range views { 190 dir := filepath.Join(ctx.Config.Workdir, "stats_"+view.Name) 191 err := ctx.Target.SaveStatView(view, dir) 192 if err != nil { 193 return err 194 } 195 } 196 table := ctx.TestbedStatsTable() 197 return table.SaveAsCsv(filepath.Join(ctx.Config.Workdir, "testbed.csv")) 198 } 199 200 func (ctx *TestbedContext) Slot(slotID int, stop chan struct{}, ret chan error) { 201 // It seems that even gracefully finished syz-managers can leak GCE instances. 202 // To allow for that strange behavior, let's reuse syz-manager names in each slot, 203 // so that its VMs will in turn reuse the names of the leaked ones. 204 slotName := fmt.Sprintf("%s-%d", ctx.Config.Name, slotID) 205 for { 206 checkout, instance, err := ctx.Target.NewJob(slotName, ctx.Checkouts) 207 if err != nil { 208 ret <- fmt.Errorf("failed to create instance: %w", err) 209 return 210 } 211 checkout.AddRunning(instance) 212 retChannel := make(chan error) 213 go func() { 214 retChannel <- instance.Run() 215 }() 216 217 var retErr error 218 select { 219 case <-stop: 220 instance.Stop() 221 <-retChannel 222 retErr = fmt.Errorf("instance was killed") 223 case retErr = <-retChannel: 224 } 225 226 // For now, we only archive instances that finished normally (ret == nil). 227 // syz-testbed will anyway stop after such an error, so it's not a problem 228 // that they remain in Running. 229 if retErr != nil { 230 ret <- retErr 231 return 232 } 233 err = checkout.ArchiveInstance(instance) 234 if err != nil { 235 ret <- fmt.Errorf("a call to ArchiveInstance failed: %w", err) 236 return 237 } 238 } 239 } 240 241 // Create instances, run them, stop them, archive them, and so on... 242 func (ctx *TestbedContext) Loop(stop chan struct{}) { 243 stopAll := make(chan struct{}) 244 errors := make(chan error) 245 for i := 0; i < ctx.Config.MaxInstances; i++ { 246 go ctx.Slot(i, stopAll, errors) 247 } 248 249 exited := 0 250 select { 251 case <-stop: 252 log.Printf("stopping the experiment") 253 case err := <-errors: 254 exited = 1 255 log.Printf("an instance has failed (%s), stopping everything", err) 256 } 257 close(stopAll) 258 for ; exited < ctx.Config.MaxInstances; exited++ { 259 <-errors 260 } 261 } 262 263 func (d *DurationConfig) UnmarshalJSON(data []byte) error { 264 var v interface{} 265 if err := json.Unmarshal(data, &v); err != nil { 266 return err 267 } 268 str, ok := v.(string) 269 if !ok { 270 return fmt.Errorf("%s was expected to be a string", data) 271 } 272 parsed, err := time.ParseDuration(str) 273 if err == nil { 274 d.Duration = parsed 275 } 276 return err 277 } 278 279 func (d *DurationConfig) MarshalJSON() ([]byte, error) { 280 return json.Marshal(d.String()) 281 } 282 283 func checkReproTestConfig(cfg *ReproTestConfig) error { 284 if cfg.InputLogs != "" && !osutil.IsExist(cfg.InputLogs) { 285 return fmt.Errorf("input_log folder does not exist: %v", cfg.InputLogs) 286 } 287 if cfg.InputWorkdir != "" && !osutil.IsExist(cfg.InputWorkdir) { 288 return fmt.Errorf("input_workdir folder does not exist: %v", cfg.InputWorkdir) 289 } 290 if cfg.CrashesPerBug < 1 { 291 return fmt.Errorf("crashes_per_bug cannot be less than 1: %d", cfg.CrashesPerBug) 292 } 293 return nil 294 } 295 296 func checkConfig(cfg *TestbedConfig) error { 297 testbedNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`) 298 if !testbedNameRe.MatchString(cfg.Name) { 299 return fmt.Errorf("invalid testbed name: %v", cfg.Name) 300 } 301 if cfg.Workdir == "" { 302 return fmt.Errorf("workdir is empty") 303 } 304 cfg.Workdir = osutil.Abs(cfg.Workdir) 305 err := osutil.MkdirAll(cfg.Workdir) 306 if err != nil { 307 return err 308 } 309 if cfg.Corpus != "" && !osutil.IsExist(cfg.Corpus) { 310 return fmt.Errorf("corpus %v does not exist", cfg.Corpus) 311 } 312 if cfg.MaxInstances < 1 { 313 return fmt.Errorf("max_instances cannot be less than 1") 314 } 315 if cfg.BenchCmp != "" && !osutil.IsExist(cfg.BenchCmp) { 316 return fmt.Errorf("benchmp path is specified, but %s does not exist", cfg.BenchCmp) 317 } 318 if _, ok := targetConstructors[cfg.Target]; !ok { 319 return fmt.Errorf("unknown target %s", cfg.Target) 320 } 321 if err = checkReproTestConfig(&cfg.ReproConfig); err != nil { 322 return err 323 } 324 cfg.Corpus = osutil.Abs(cfg.Corpus) 325 names := make(map[string]bool) 326 for idx := range cfg.Checkouts { 327 co := &cfg.Checkouts[idx] 328 if !vcs.CheckRepoAddress(co.Repo) { 329 return fmt.Errorf("invalid repo: %s", co.Repo) 330 } 331 if co.Branch == "" { 332 co.Branch = "master" 333 } else if !vcs.CheckBranch(co.Branch) { 334 return fmt.Errorf("invalid branch: %s", co.Branch) 335 } 336 if names[co.Name] { 337 return fmt.Errorf("duplicate checkout name: %v", co.Name) 338 } 339 names[co.Name] = true 340 } 341 return nil 342 }