github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/tools/syz-testbed/testbed.go

github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/tools/syz-testbed/testbed.go (about)

     1  // Copyright 2021 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // syz-testbed automatically checks out, builds and sets up a number of syzkaller instances.
     5  // This might be very helpful e.g. when gauging the effect of new changes on the total syzkaller
     6  // performance.
     7  // For details see docs/syz_testbed.md.
     8  
     9  package main
    10  
    11  import (
    12  	"encoding/json"
    13  	"flag"
    14  	"fmt"
    15  	"log"
    16  	"os/exec"
    17  	"path/filepath"
    18  	"regexp"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/google/syzkaller/pkg/config"
    23  	"github.com/google/syzkaller/pkg/osutil"
    24  	"github.com/google/syzkaller/pkg/tool"
    25  	"github.com/google/syzkaller/pkg/vcs"
    26  )
    27  
    28  var (
    29  	flagConfig = flag.String("config", "", "config file")
    30  )
    31  
    32  type TestbedConfig struct {
    33  	Name          string           `json:"name"`           // name of the testbed
    34  	Target        string           `json:"target"`         // what application to test
    35  	MaxInstances  int              `json:"max_instances"`  // max # of simultaneously running instances
    36  	RunTime       DurationConfig   `json:"run_time"`       // lifetime of an instance (default "24h")
    37  	HTTP          string           `json:"http"`           // on which port to set up a simple web dashboard
    38  	BenchCmp      string           `json:"benchcmp"`       // path to the syz-benchcmp executable
    39  	Corpus        string           `json:"corpus"`         // path to the corpus file
    40  	Workdir       string           `json:"workdir"`        // instances will be checked out there
    41  	ReproConfig   ReproTestConfig  `json:"repro_config"`   // syz-repro benchmarking config
    42  	ManagerConfig json.RawMessage  `json:"manager_config"` // base manager config
    43  	ManagerMode   string           `json:"manager_mode"`   // manager mode flag
    44  	Checkouts     []CheckoutConfig `json:"checkouts"`
    45  }
    46  
    47  type DurationConfig struct {
    48  	time.Duration
    49  }
    50  
    51  type CheckoutConfig struct {
    52  	Name          string          `json:"name"`
    53  	Repo          string          `json:"repo"`
    54  	Branch        string          `json:"branch"`
    55  	ManagerConfig json.RawMessage `json:"manager_config"` // a patch to manager config
    56  }
    57  
    58  type ReproTestConfig struct {
    59  	InputLogs     string   `json:"input_logs"`      // take crash logs from a folder
    60  	InputWorkdir  string   `json:"input_workdir"`   // take crash logs from a syzkaller's workdir
    61  	CrashesPerBug int      `json:"crashes_per_bug"` // how many crashes must be taken from each bug
    62  	SkipBugs      []string `json:"skip_bugs"`       // crashes to exclude from the workdir, list of regexps
    63  }
    64  
    65  type TestbedContext struct {
    66  	Config         *TestbedConfig
    67  	Checkouts      []*Checkout
    68  	NextCheckoutID int
    69  	NextInstanceID int
    70  	Target         TestbedTarget
    71  	mu             sync.Mutex
    72  }
    73  
    74  func main() {
    75  	flag.Parse()
    76  	benchcmp, _ := exec.LookPath("syz-benchcmp")
    77  	cfg := &TestbedConfig{
    78  		Name:     "testbed",
    79  		Target:   "syz-manager",
    80  		BenchCmp: benchcmp,
    81  		RunTime:  DurationConfig{24 * time.Hour},
    82  		ReproConfig: ReproTestConfig{
    83  			CrashesPerBug: 1,
    84  		},
    85  		ManagerMode: "fuzzing",
    86  	}
    87  	err := config.LoadFile(*flagConfig, &cfg)
    88  	if err != nil {
    89  		tool.Failf("failed to read config: %s", err)
    90  	}
    91  
    92  	err = checkConfig(cfg)
    93  	if err != nil {
    94  		tool.Failf("invalid config: %s", err)
    95  	}
    96  	ctx := TestbedContext{
    97  		Config: cfg,
    98  		Target: targetConstructors[cfg.Target](cfg),
    99  	}
   100  	go ctx.setupHTTPServer()
   101  
   102  	for _, checkoutCfg := range cfg.Checkouts {
   103  		mgrCfg := ctx.MakeMgrConfig(cfg.ManagerConfig, checkoutCfg.ManagerConfig)
   104  		co, err := ctx.NewCheckout(&checkoutCfg, mgrCfg)
   105  		if err != nil {
   106  			tool.Failf("checkout failed: %s", err)
   107  		}
   108  		ctx.Checkouts = append(ctx.Checkouts, co)
   109  	}
   110  
   111  	shutdown := make(chan struct{})
   112  	osutil.HandleInterrupts(shutdown)
   113  
   114  	go func() {
   115  		const period = 90 * time.Second
   116  		for {
   117  			time.Sleep(period)
   118  			err := ctx.SaveStats()
   119  			if err != nil {
   120  				log.Printf("stats saving error: %s", err)
   121  			}
   122  		}
   123  	}()
   124  
   125  	ctx.Loop(shutdown)
   126  }
   127  
   128  func (ctx *TestbedContext) MakeMgrConfig(base, patch json.RawMessage) json.RawMessage {
   129  	mgrCfg, err := config.MergeJSONs(base, patch)
   130  	if err != nil {
   131  		tool.Failf("failed to apply a patch to the base manager config: %s", err)
   132  	}
   133  	// We don't care much about the specific ports of syz-managers.
   134  	mgrCfg, err = config.PatchJSON(mgrCfg, map[string]interface{}{"HTTP": ":0"})
   135  	if err != nil {
   136  		tool.Failf("failed to assign empty HTTP value: %s", err)
   137  	}
   138  	return mgrCfg
   139  }
   140  
   141  func (ctx *TestbedContext) GetStatViews() ([]StatView, error) {
   142  	groupsCompleted := []RunResultGroup{}
   143  	groupsAll := []RunResultGroup{}
   144  	for _, checkout := range ctx.Checkouts {
   145  		running := checkout.GetRunningResults()
   146  		completed := checkout.GetCompletedResults()
   147  		groupsCompleted = append(groupsCompleted, RunResultGroup{
   148  			Name:    checkout.Name,
   149  			Results: completed,
   150  		})
   151  		groupsAll = append(groupsAll, RunResultGroup{
   152  			Name:    checkout.Name,
   153  			Results: append(completed, running...),
   154  		})
   155  	}
   156  	return []StatView{
   157  		{
   158  			Name:   "completed",
   159  			Groups: groupsCompleted,
   160  		},
   161  		{
   162  			Name:   "all",
   163  			Groups: groupsAll,
   164  		},
   165  	}, nil
   166  }
   167  
   168  func (ctx *TestbedContext) TestbedStatsTable() *Table {
   169  	table := NewTable("Checkout", "Running", "Completed", "Last started")
   170  	for _, checkout := range ctx.Checkouts {
   171  		checkout.mu.Lock()
   172  		last := ""
   173  		if !checkout.LastRunning.IsZero() {
   174  			last = time.Since(checkout.LastRunning).Round(time.Second).String()
   175  		}
   176  		table.AddRow(checkout.Name,
   177  			fmt.Sprintf("%d", len(checkout.Running)),
   178  			fmt.Sprintf("%d", len(checkout.Completed)),
   179  			last,
   180  		)
   181  		checkout.mu.Unlock()
   182  	}
   183  	return table
   184  }
   185  
   186  func (ctx *TestbedContext) SaveStats() error {
   187  	// Preventing concurrent saving of the stats.
   188  	ctx.mu.Lock()
   189  	defer ctx.mu.Unlock()
   190  	views, err := ctx.GetStatViews()
   191  	if err != nil {
   192  		return err
   193  	}
   194  	for _, view := range views {
   195  		dir := filepath.Join(ctx.Config.Workdir, "stats_"+view.Name)
   196  		err := ctx.Target.SaveStatView(&view, dir)
   197  		if err != nil {
   198  			return err
   199  		}
   200  	}
   201  	table := ctx.TestbedStatsTable()
   202  	return table.SaveAsCsv(filepath.Join(ctx.Config.Workdir, "testbed.csv"))
   203  }
   204  
   205  func (ctx *TestbedContext) Slot(slotID int, stop chan struct{}, ret chan error) {
   206  	// It seems that even gracefully finished syz-managers can leak GCE instances.
   207  	// To allow for that strange behavior, let's reuse syz-manager names in each slot,
   208  	// so that its VMs will in turn reuse the names of the leaked ones.
   209  	slotName := fmt.Sprintf("%s-%d", ctx.Config.Name, slotID)
   210  	for {
   211  		checkout, instance, err := ctx.Target.NewJob(slotName, ctx.Checkouts)
   212  		if err != nil {
   213  			ret <- fmt.Errorf("failed to create instance: %w", err)
   214  			return
   215  		}
   216  		checkout.AddRunning(instance)
   217  		retChannel := make(chan error)
   218  		go func() {
   219  			retChannel <- instance.Run()
   220  		}()
   221  
   222  		var retErr error
   223  		select {
   224  		case <-stop:
   225  			instance.Stop()
   226  			<-retChannel
   227  			retErr = fmt.Errorf("instance was killed")
   228  		case retErr = <-retChannel:
   229  		}
   230  
   231  		// For now, we only archive instances that finished normally (ret == nil).
   232  		// syz-testbed will anyway stop after such an error, so it's not a problem
   233  		// that they remain in Running.
   234  		if retErr != nil {
   235  			ret <- retErr
   236  			return
   237  		}
   238  		err = checkout.ArchiveInstance(instance)
   239  		if err != nil {
   240  			ret <- fmt.Errorf("a call to ArchiveInstance failed: %w", err)
   241  			return
   242  		}
   243  	}
   244  }
   245  
   246  // Create instances, run them, stop them, archive them, and so on...
   247  func (ctx *TestbedContext) Loop(stop chan struct{}) {
   248  	stopAll := make(chan struct{})
   249  	errors := make(chan error)
   250  	for i := 0; i < ctx.Config.MaxInstances; i++ {
   251  		go ctx.Slot(i, stopAll, errors)
   252  	}
   253  
   254  	exited := 0
   255  	select {
   256  	case <-stop:
   257  		log.Printf("stopping the experiment")
   258  	case err := <-errors:
   259  		exited = 1
   260  		log.Printf("an instance has failed (%s), stopping everything", err)
   261  	}
   262  	close(stopAll)
   263  	for ; exited < ctx.Config.MaxInstances; exited++ {
   264  		<-errors
   265  	}
   266  }
   267  
   268  func (d *DurationConfig) UnmarshalJSON(data []byte) error {
   269  	var v interface{}
   270  	if err := json.Unmarshal(data, &v); err != nil {
   271  		return err
   272  	}
   273  	str, ok := v.(string)
   274  	if !ok {
   275  		return fmt.Errorf("%s was expected to be a string", data)
   276  	}
   277  	parsed, err := time.ParseDuration(str)
   278  	if err == nil {
   279  		d.Duration = parsed
   280  	}
   281  	return err
   282  }
   283  
   284  func (d *DurationConfig) MarshalJSON() ([]byte, error) {
   285  	return json.Marshal(d.String())
   286  }
   287  
   288  func checkReproTestConfig(cfg *ReproTestConfig) error {
   289  	if cfg.InputLogs != "" && !osutil.IsExist(cfg.InputLogs) {
   290  		return fmt.Errorf("input_log folder does not exist: %v", cfg.InputLogs)
   291  	}
   292  	if cfg.InputWorkdir != "" && !osutil.IsExist(cfg.InputWorkdir) {
   293  		return fmt.Errorf("input_workdir folder does not exist: %v", cfg.InputWorkdir)
   294  	}
   295  	if cfg.CrashesPerBug < 1 {
   296  		return fmt.Errorf("crashes_per_bug cannot be less than 1: %d", cfg.CrashesPerBug)
   297  	}
   298  	return nil
   299  }
   300  
   301  func checkConfig(cfg *TestbedConfig) error {
   302  	testbedNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`)
   303  	if !testbedNameRe.MatchString(cfg.Name) {
   304  		return fmt.Errorf("invalid testbed name: %v", cfg.Name)
   305  	}
   306  	if cfg.Workdir == "" {
   307  		return fmt.Errorf("workdir is empty")
   308  	}
   309  	cfg.Workdir = osutil.Abs(cfg.Workdir)
   310  	err := osutil.MkdirAll(cfg.Workdir)
   311  	if err != nil {
   312  		return err
   313  	}
   314  	if cfg.Corpus != "" && !osutil.IsExist(cfg.Corpus) {
   315  		return fmt.Errorf("corpus %v does not exist", cfg.Corpus)
   316  	}
   317  	if cfg.MaxInstances < 1 {
   318  		return fmt.Errorf("max_instances cannot be less than 1")
   319  	}
   320  	if cfg.BenchCmp != "" && !osutil.IsExist(cfg.BenchCmp) {
   321  		return fmt.Errorf("benchmp path is specified, but %s does not exist", cfg.BenchCmp)
   322  	}
   323  	if _, ok := targetConstructors[cfg.Target]; !ok {
   324  		return fmt.Errorf("unknown target %s", cfg.Target)
   325  	}
   326  	if err = checkReproTestConfig(&cfg.ReproConfig); err != nil {
   327  		return err
   328  	}
   329  	cfg.Corpus = osutil.Abs(cfg.Corpus)
   330  	names := make(map[string]bool)
   331  	for idx := range cfg.Checkouts {
   332  		co := &cfg.Checkouts[idx]
   333  		if !vcs.CheckRepoAddress(co.Repo) {
   334  			return fmt.Errorf("invalid repo: %s", co.Repo)
   335  		}
   336  		if co.Branch == "" {
   337  			co.Branch = "master"
   338  		} else if !vcs.CheckBranch(co.Branch) {
   339  			return fmt.Errorf("invalid branch: %s", co.Branch)
   340  		}
   341  		if names[co.Name] {
   342  			return fmt.Errorf("duplicate checkout name: %v", co.Name)
   343  		}
   344  		names[co.Name] = true
   345  	}
   346  	return nil
   347  }