github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/tools/syz-testbed/testbed.go (about)

     1  // Copyright 2021 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // syz-testbed automatically checks out, builds and sets up a number of syzkaller instances.
     5  // This might be very helpful e.g. when gauging the effect of new changes on the total syzkaller
     6  // performance.
     7  // For details see docs/syz_testbed.md.
     8  
     9  package main
    10  
    11  import (
    12  	"encoding/json"
    13  	"flag"
    14  	"fmt"
    15  	"log"
    16  	"path/filepath"
    17  	"regexp"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/google/syzkaller/pkg/config"
    22  	"github.com/google/syzkaller/pkg/osutil"
    23  	"github.com/google/syzkaller/pkg/tool"
    24  	"github.com/google/syzkaller/pkg/vcs"
    25  )
    26  
    27  var (
    28  	flagConfig = flag.String("config", "", "config file")
    29  )
    30  
    31  type TestbedConfig struct {
    32  	Name          string           `json:"name"`           // name of the testbed
    33  	Target        string           `json:"target"`         // what application to test
    34  	MaxInstances  int              `json:"max_instances"`  // max # of simultaneously running instances
    35  	RunTime       DurationConfig   `json:"run_time"`       // lifetime of an instance (default "24h")
    36  	HTTP          string           `json:"http"`           // on which port to set up a simple web dashboard
    37  	BenchCmp      string           `json:"benchcmp"`       // path to the syz-benchcmp executable
    38  	Corpus        string           `json:"corpus"`         // path to the corpus file
    39  	Workdir       string           `json:"workdir"`        // instances will be checked out there
    40  	ReproConfig   ReproTestConfig  `json:"repro_config"`   // syz-repro benchmarking config
    41  	ManagerConfig json.RawMessage  `json:"manager_config"` // base manager config
    42  	Checkouts     []CheckoutConfig `json:"checkouts"`
    43  }
    44  
    45  type DurationConfig struct {
    46  	time.Duration
    47  }
    48  
    49  type CheckoutConfig struct {
    50  	Name          string          `json:"name"`
    51  	Repo          string          `json:"repo"`
    52  	Branch        string          `json:"branch"`
    53  	ManagerConfig json.RawMessage `json:"manager_config"` // a patch to manager config
    54  }
    55  
    56  type ReproTestConfig struct {
    57  	InputLogs     string   `json:"input_logs"`      // take crash logs from a folder
    58  	InputWorkdir  string   `json:"input_workdir"`   // take crash logs from a syzkaller's workdir
    59  	CrashesPerBug int      `json:"crashes_per_bug"` // how many crashes must be taken from each bug
    60  	SkipBugs      []string `json:"skip_bugs"`       // crashes to exclude from the workdir, list of regexps
    61  }
    62  
    63  type TestbedContext struct {
    64  	Config         *TestbedConfig
    65  	Checkouts      []*Checkout
    66  	NextCheckoutID int
    67  	NextInstanceID int
    68  	Target         TestbedTarget
    69  	mu             sync.Mutex
    70  }
    71  
    72  func main() {
    73  	flag.Parse()
    74  	cfg := &TestbedConfig{
    75  		Name:    "testbed",
    76  		Target:  "syz-manager",
    77  		RunTime: DurationConfig{24 * time.Hour},
    78  		ReproConfig: ReproTestConfig{
    79  			CrashesPerBug: 1,
    80  		},
    81  	}
    82  	err := config.LoadFile(*flagConfig, &cfg)
    83  	if err != nil {
    84  		tool.Failf("failed to read config: %s", err)
    85  	}
    86  
    87  	err = checkConfig(cfg)
    88  	if err != nil {
    89  		tool.Failf("invalid config: %s", err)
    90  	}
    91  	ctx := TestbedContext{
    92  		Config: cfg,
    93  		Target: targetConstructors[cfg.Target](cfg),
    94  	}
    95  	go ctx.setupHTTPServer()
    96  
    97  	for _, checkoutCfg := range cfg.Checkouts {
    98  		mgrCfg := ctx.MakeMgrConfig(cfg.ManagerConfig, checkoutCfg.ManagerConfig)
    99  		co, err := ctx.NewCheckout(&checkoutCfg, mgrCfg)
   100  		if err != nil {
   101  			tool.Failf("checkout failed: %s", err)
   102  		}
   103  		ctx.Checkouts = append(ctx.Checkouts, co)
   104  	}
   105  
   106  	shutdown := make(chan struct{})
   107  	osutil.HandleInterrupts(shutdown)
   108  
   109  	go func() {
   110  		const period = 90 * time.Second
   111  		for {
   112  			time.Sleep(period)
   113  			err := ctx.SaveStats()
   114  			if err != nil {
   115  				log.Printf("stats saving error: %s", err)
   116  			}
   117  		}
   118  	}()
   119  
   120  	ctx.Loop(shutdown)
   121  }
   122  
   123  func (ctx *TestbedContext) MakeMgrConfig(base, patch json.RawMessage) json.RawMessage {
   124  	mgrCfg, err := config.MergeJSONs(base, patch)
   125  	if err != nil {
   126  		tool.Failf("failed to apply a patch to the base manager config: %s", err)
   127  	}
   128  	// We don't care much about the specific ports of syz-managers.
   129  	mgrCfg, err = config.PatchJSON(mgrCfg, map[string]interface{}{"HTTP": ":0"})
   130  	if err != nil {
   131  		tool.Failf("failed to assign empty HTTP value: %s", err)
   132  	}
   133  	return mgrCfg
   134  }
   135  
   136  func (ctx *TestbedContext) GetStatViews() ([]StatView, error) {
   137  	groupsCompleted := []RunResultGroup{}
   138  	groupsAll := []RunResultGroup{}
   139  	for _, checkout := range ctx.Checkouts {
   140  		running := checkout.GetRunningResults()
   141  		completed := checkout.GetCompletedResults()
   142  		groupsCompleted = append(groupsCompleted, RunResultGroup{
   143  			Name:    checkout.Name,
   144  			Results: completed,
   145  		})
   146  		groupsAll = append(groupsAll, RunResultGroup{
   147  			Name:    checkout.Name,
   148  			Results: append(completed, running...),
   149  		})
   150  	}
   151  	return []StatView{
   152  		{
   153  			Name:   "completed",
   154  			Groups: groupsCompleted,
   155  		},
   156  		{
   157  			Name:   "all",
   158  			Groups: groupsAll,
   159  		},
   160  	}, nil
   161  }
   162  
   163  func (ctx *TestbedContext) TestbedStatsTable() *Table {
   164  	table := NewTable("Checkout", "Running", "Completed", "Last started")
   165  	for _, checkout := range ctx.Checkouts {
   166  		checkout.mu.Lock()
   167  		last := ""
   168  		if !checkout.LastRunning.IsZero() {
   169  			last = time.Since(checkout.LastRunning).Round(time.Second).String()
   170  		}
   171  		table.AddRow(checkout.Name,
   172  			fmt.Sprintf("%d", len(checkout.Running)),
   173  			fmt.Sprintf("%d", len(checkout.Completed)),
   174  			last,
   175  		)
   176  		checkout.mu.Unlock()
   177  	}
   178  	return table
   179  }
   180  
   181  func (ctx *TestbedContext) SaveStats() error {
   182  	// Preventing concurrent saving of the stats.
   183  	ctx.mu.Lock()
   184  	defer ctx.mu.Unlock()
   185  	views, err := ctx.GetStatViews()
   186  	if err != nil {
   187  		return err
   188  	}
   189  	for _, view := range views {
   190  		dir := filepath.Join(ctx.Config.Workdir, "stats_"+view.Name)
   191  		err := ctx.Target.SaveStatView(view, dir)
   192  		if err != nil {
   193  			return err
   194  		}
   195  	}
   196  	table := ctx.TestbedStatsTable()
   197  	return table.SaveAsCsv(filepath.Join(ctx.Config.Workdir, "testbed.csv"))
   198  }
   199  
   200  func (ctx *TestbedContext) Slot(slotID int, stop chan struct{}, ret chan error) {
   201  	// It seems that even gracefully finished syz-managers can leak GCE instances.
   202  	// To allow for that strange behavior, let's reuse syz-manager names in each slot,
   203  	// so that its VMs will in turn reuse the names of the leaked ones.
   204  	slotName := fmt.Sprintf("%s-%d", ctx.Config.Name, slotID)
   205  	for {
   206  		checkout, instance, err := ctx.Target.NewJob(slotName, ctx.Checkouts)
   207  		if err != nil {
   208  			ret <- fmt.Errorf("failed to create instance: %w", err)
   209  			return
   210  		}
   211  		checkout.AddRunning(instance)
   212  		retChannel := make(chan error)
   213  		go func() {
   214  			retChannel <- instance.Run()
   215  		}()
   216  
   217  		var retErr error
   218  		select {
   219  		case <-stop:
   220  			instance.Stop()
   221  			<-retChannel
   222  			retErr = fmt.Errorf("instance was killed")
   223  		case retErr = <-retChannel:
   224  		}
   225  
   226  		// For now, we only archive instances that finished normally (ret == nil).
   227  		// syz-testbed will anyway stop after such an error, so it's not a problem
   228  		// that they remain in Running.
   229  		if retErr != nil {
   230  			ret <- retErr
   231  			return
   232  		}
   233  		err = checkout.ArchiveInstance(instance)
   234  		if err != nil {
   235  			ret <- fmt.Errorf("a call to ArchiveInstance failed: %w", err)
   236  			return
   237  		}
   238  	}
   239  }
   240  
   241  // Create instances, run them, stop them, archive them, and so on...
   242  func (ctx *TestbedContext) Loop(stop chan struct{}) {
   243  	stopAll := make(chan struct{})
   244  	errors := make(chan error)
   245  	for i := 0; i < ctx.Config.MaxInstances; i++ {
   246  		go ctx.Slot(i, stopAll, errors)
   247  	}
   248  
   249  	exited := 0
   250  	select {
   251  	case <-stop:
   252  		log.Printf("stopping the experiment")
   253  	case err := <-errors:
   254  		exited = 1
   255  		log.Printf("an instance has failed (%s), stopping everything", err)
   256  	}
   257  	close(stopAll)
   258  	for ; exited < ctx.Config.MaxInstances; exited++ {
   259  		<-errors
   260  	}
   261  }
   262  
   263  func (d *DurationConfig) UnmarshalJSON(data []byte) error {
   264  	var v interface{}
   265  	if err := json.Unmarshal(data, &v); err != nil {
   266  		return err
   267  	}
   268  	str, ok := v.(string)
   269  	if !ok {
   270  		return fmt.Errorf("%s was expected to be a string", data)
   271  	}
   272  	parsed, err := time.ParseDuration(str)
   273  	if err == nil {
   274  		d.Duration = parsed
   275  	}
   276  	return err
   277  }
   278  
   279  func (d *DurationConfig) MarshalJSON() ([]byte, error) {
   280  	return json.Marshal(d.String())
   281  }
   282  
   283  func checkReproTestConfig(cfg *ReproTestConfig) error {
   284  	if cfg.InputLogs != "" && !osutil.IsExist(cfg.InputLogs) {
   285  		return fmt.Errorf("input_log folder does not exist: %v", cfg.InputLogs)
   286  	}
   287  	if cfg.InputWorkdir != "" && !osutil.IsExist(cfg.InputWorkdir) {
   288  		return fmt.Errorf("input_workdir folder does not exist: %v", cfg.InputWorkdir)
   289  	}
   290  	if cfg.CrashesPerBug < 1 {
   291  		return fmt.Errorf("crashes_per_bug cannot be less than 1: %d", cfg.CrashesPerBug)
   292  	}
   293  	return nil
   294  }
   295  
   296  func checkConfig(cfg *TestbedConfig) error {
   297  	testbedNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`)
   298  	if !testbedNameRe.MatchString(cfg.Name) {
   299  		return fmt.Errorf("invalid testbed name: %v", cfg.Name)
   300  	}
   301  	if cfg.Workdir == "" {
   302  		return fmt.Errorf("workdir is empty")
   303  	}
   304  	cfg.Workdir = osutil.Abs(cfg.Workdir)
   305  	err := osutil.MkdirAll(cfg.Workdir)
   306  	if err != nil {
   307  		return err
   308  	}
   309  	if cfg.Corpus != "" && !osutil.IsExist(cfg.Corpus) {
   310  		return fmt.Errorf("corpus %v does not exist", cfg.Corpus)
   311  	}
   312  	if cfg.MaxInstances < 1 {
   313  		return fmt.Errorf("max_instances cannot be less than 1")
   314  	}
   315  	if cfg.BenchCmp != "" && !osutil.IsExist(cfg.BenchCmp) {
   316  		return fmt.Errorf("benchmp path is specified, but %s does not exist", cfg.BenchCmp)
   317  	}
   318  	if _, ok := targetConstructors[cfg.Target]; !ok {
   319  		return fmt.Errorf("unknown target %s", cfg.Target)
   320  	}
   321  	if err = checkReproTestConfig(&cfg.ReproConfig); err != nil {
   322  		return err
   323  	}
   324  	cfg.Corpus = osutil.Abs(cfg.Corpus)
   325  	names := make(map[string]bool)
   326  	for idx := range cfg.Checkouts {
   327  		co := &cfg.Checkouts[idx]
   328  		if !vcs.CheckRepoAddress(co.Repo) {
   329  			return fmt.Errorf("invalid repo: %s", co.Repo)
   330  		}
   331  		if co.Branch == "" {
   332  			co.Branch = "master"
   333  		} else if !vcs.CheckBranch(co.Branch) {
   334  			return fmt.Errorf("invalid branch: %s", co.Branch)
   335  		}
   336  		if names[co.Name] {
   337  			return fmt.Errorf("duplicate checkout name: %v", co.Name)
   338  		}
   339  		names[co.Name] = true
   340  	}
   341  	return nil
   342  }