github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/vcs/linux.go (about)

     1  // Copyright 2019 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package vcs
     5  
     6  import (
     7  	"bytes"
     8  	"errors"
     9  	"fmt"
    10  	"net/mail"
    11  	"path/filepath"
    12  	"regexp"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/google/syzkaller/pkg/debugtracer"
    18  	"github.com/google/syzkaller/pkg/kconfig"
    19  	"github.com/google/syzkaller/pkg/osutil"
    20  	"github.com/google/syzkaller/pkg/report/crash"
    21  	"github.com/google/syzkaller/prog"
    22  	"github.com/google/syzkaller/sys/targets"
    23  )
    24  
    25  type linux struct {
    26  	*git
    27  	vmType string
    28  }
    29  
    30  var (
    31  	_ Bisecter        = new(linux)
    32  	_ ConfigMinimizer = new(linux)
    33  )
    34  
    35  func newLinux(dir string, opts []RepoOpt, vmType string) *linux {
    36  	ignoreCC := map[string]bool{
    37  		"stable@vger.kernel.org": true,
    38  	}
    39  
    40  	return &linux{
    41  		git:    newGit(dir, ignoreCC, opts),
    42  		vmType: vmType,
    43  	}
    44  }
    45  
    46  func (ctx *linux) PreviousReleaseTags(commit, compilerType string) ([]string, error) {
    47  	tags, err := ctx.git.previousReleaseTags(commit, false, false, false)
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  
    52  	cutoff := ""
    53  	if compilerType == "gcc" {
    54  		// Initially we tried to stop at 3.8 because:
    55  		// v3.8 does not work with modern perl, and as we go further in history
    56  		// make stops to work, then binutils, glibc, etc. So we stop at v3.8.
    57  		// Up to that point we only need an ancient gcc.
    58  		//
    59  		// But kernels don't boot starting from 4.0 and back.
    60  		// That was fixed by 99124e4db5b7b70daeaaf1d88a6a8078a0004c6e,
    61  		// and it can be cherry-picked into 3.14..4.0 but it conflicts for 3.13 and older.
    62  		//
    63  		// But starting from 4.0 our user-space binaries start crashing with
    64  		// assorted errors which suggests process memory corruption by kernel.
    65  		//
    66  		// We used to use 4.1 as the oldest tested release (it works in general).
    67  		// However, there is correlation between how far back we go and probability
    68  		// of getting correct result (see #1532). So we then stopped at 4.6.
    69  		// 4.6 is somewhat arbitrary, we've seen lots of wrong results in 4.5..4.6 range,
    70  		// but there is definitive reason for 4.6. Most likely later we want to bump it
    71  		// even more (as new releases are produced). Next good candidate may be 4.11
    72  		// because then we won't need gcc 5.5.
    73  		//
    74  		// TODO: The buildroot images deployed after #2820 can only boot v4.19+ kernels.
    75  		// This has caused lots of bad bisection results, see #3224. We either need a new
    76  		// universal image or a kernel version dependant image selection.
    77  		cutoff = "v4.18"
    78  	} else if compilerType == "clang" {
    79  		// v5.3 was the first release with solid clang support, however I was able to
    80  		// compile v5.1..v5.3 using a newer defconfig + make oldconfig. Everything older
    81  		// would require further cherry-picks.
    82  		cutoff = "v5.2"
    83  	}
    84  
    85  	for i, tag := range tags {
    86  		if tag == cutoff {
    87  			tags = tags[:i]
    88  			break
    89  		}
    90  	}
    91  	return tags, nil
    92  }
    93  
    94  func gitParseReleaseTags(output []byte, includeRC bool) []string {
    95  	var tags []string
    96  	for _, tag := range bytes.Split(output, []byte{'\n'}) {
    97  		if gitReleaseTagToInt(string(tag), includeRC) != 0 {
    98  			tags = append(tags, string(tag))
    99  		}
   100  	}
   101  	sort.Slice(tags, func(i, j int) bool {
   102  		return gitReleaseTagToInt(tags[i], includeRC) > gitReleaseTagToInt(tags[j], includeRC)
   103  	})
   104  	return tags
   105  }
   106  
   107  func gitReleaseTagToInt(tag string, includeRC bool) uint64 {
   108  	v1, v2, rc, v3 := ParseReleaseTag(tag)
   109  	if v1 < 0 {
   110  		return 0
   111  	}
   112  	if v3 < 0 {
   113  		v3 = 0
   114  	}
   115  	if rc >= 0 {
   116  		if !includeRC {
   117  			return 0
   118  		}
   119  	} else {
   120  		rc = 999
   121  	}
   122  	return uint64(v1)*1e9 + uint64(v2)*1e6 + uint64(rc)*1e3 + uint64(v3)
   123  }
   124  
   125  func (ctx *linux) EnvForCommit(
   126  	defaultCompiler, compilerType, binDir, commit string, kernelConfig []byte,
   127  	backports []BackportCommit,
   128  ) (*BisectEnv, error) {
   129  	tagList, err := ctx.previousReleaseTags(commit, true, false, false)
   130  	if err != nil {
   131  		return nil, err
   132  	}
   133  	tags := make(map[string]bool)
   134  	for _, tag := range tagList {
   135  		tags[tag] = true
   136  	}
   137  	cf, err := kconfig.ParseConfigData(kernelConfig, "config")
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  	setLinuxTagConfigs(cf, tags)
   142  
   143  	compiler := ""
   144  	if compilerType == "gcc" {
   145  		compiler = linuxGCCPath(tags, binDir, defaultCompiler)
   146  	} else if compilerType == "clang" {
   147  		compiler = linuxClangPath(tags, binDir, defaultCompiler)
   148  	} else {
   149  		return nil, fmt.Errorf("unsupported bisect compiler: %v", compilerType)
   150  	}
   151  
   152  	env := &BisectEnv{
   153  		Compiler:     compiler,
   154  		KernelConfig: cf.Serialize(),
   155  	}
   156  	err = linuxFixBackports(ctx.git, backports...)
   157  	if err != nil {
   158  		return nil, fmt.Errorf("failed to cherry pick fixes: %w", err)
   159  	}
   160  	return env, nil
   161  }
   162  
   163  func linuxClangPath(tags map[string]bool, binDir, defaultCompiler string) string {
   164  	version := ""
   165  	switch {
   166  	case tags["v5.9"]:
   167  		// Verified to work with 14.0.6.
   168  		return defaultCompiler
   169  	default:
   170  		// everything before v5.3 might not work great
   171  		// everything before v5.1 does not work
   172  		version = "9.0.1"
   173  	}
   174  	return filepath.Join(binDir, "llvm-"+version, "bin", "clang")
   175  }
   176  
   177  func linuxGCCPath(tags map[string]bool, binDir, defaultCompiler string) string {
   178  	version := ""
   179  	switch {
   180  	case tags["v5.16"]:
   181  		// Verified to work with 15.0.7.
   182  		return defaultCompiler
   183  	case tags["v5.9"]:
   184  		version = "10.1.0"
   185  	case tags["v4.12"]:
   186  		version = "8.1.0"
   187  	case tags["v4.11"]:
   188  		version = "7.3.0"
   189  	default:
   190  		version = "5.5.0"
   191  	}
   192  	return filepath.Join(binDir, "gcc-"+version, "bin", "gcc")
   193  }
   194  
   195  func (ctx *linux) PrepareBisect() error {
   196  	if ctx.vmType != "gvisor" {
   197  		// Some linux repos we fuzz don't import the upstream release git tags. We need tags
   198  		// to decide which compiler versions to use. Let's fetch upstream for its tags.
   199  		err := ctx.git.fetchRemote("https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git", "")
   200  		if err != nil {
   201  			return fmt.Errorf("fetching upstream linux failed: %w", err)
   202  		}
   203  	}
   204  	return nil
   205  }
   206  
   207  func (ctx *linux) Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult,
   208  	error)) ([]*Commit, error) {
   209  	commits, err := ctx.git.Bisect(bad, good, dt, pred)
   210  	if len(commits) == 1 {
   211  		ctx.addMaintainers(commits[0])
   212  	}
   213  	return commits, err
   214  }
   215  
   216  func (ctx *linux) addMaintainers(com *Commit) {
   217  	if len(com.Recipients) > 2 {
   218  		return
   219  	}
   220  	mtrs := ctx.getMaintainers(com.Hash, false)
   221  	if len(mtrs) < 3 {
   222  		mtrs = ctx.getMaintainers(com.Hash, true)
   223  	}
   224  	com.Recipients = append(com.Recipients, mtrs...)
   225  	sort.Sort(com.Recipients)
   226  }
   227  
   228  func (ctx *linux) getMaintainers(hash string, blame bool) Recipients {
   229  	// See #1441 re --git-min-percent.
   230  	args := "git show " + hash + " | " +
   231  		filepath.FromSlash("scripts/get_maintainer.pl") +
   232  		" --git-min-percent=20"
   233  	if blame {
   234  		args += " --git-blame"
   235  	}
   236  	output, err := osutil.RunCmd(time.Minute, ctx.git.dir, "bash", "-c", args)
   237  	if err != nil {
   238  		return nil
   239  	}
   240  	return ParseMaintainersLinux(output)
   241  }
   242  
   243  func ParseMaintainersLinux(text []byte) Recipients {
   244  	lines := strings.Split(string(text), "\n")
   245  	reRole := regexp.MustCompile(` \([^)]+\)$`)
   246  	var mtrs Recipients
   247  	// LMKL is To by default, but it changes to Cc if there's also a subsystem list.
   248  	lkmlType := To
   249  	foundLkml := false
   250  	for _, line := range lines {
   251  		role := reRole.FindString(line)
   252  		address := strings.Replace(line, role, "", 1)
   253  		addr, err := mail.ParseAddress(address)
   254  		if err != nil {
   255  			continue
   256  		}
   257  		var roleType RecipientType
   258  		if addr.Address == "linux-kernel@vger.kernel.org" {
   259  			foundLkml = true
   260  			continue
   261  		} else if strings.Contains(role, "list") {
   262  			lkmlType = Cc
   263  			roleType = To
   264  		} else if strings.Contains(role, "maintainer") || strings.Contains(role, "supporter") {
   265  			roleType = To
   266  		} else {
   267  			roleType = Cc // Reviewer or other role; default to Cc.
   268  		}
   269  		mtrs = append(mtrs, RecipientInfo{*addr, roleType})
   270  	}
   271  	if foundLkml {
   272  		mtrs = append(mtrs, RecipientInfo{mail.Address{Address: "linux-kernel@vger.kernel.org"}, lkmlType})
   273  	}
   274  	sort.Sort(mtrs)
   275  	return mtrs
   276  }
   277  
   278  var ErrBadKconfig = errors.New("failed to parse Kconfig")
   279  
   280  const configBisectTag = "# Minimized by syzkaller"
   281  
   282  // Minimize() attempts to drop Linux kernel configs that are unnecessary(*) for bug reproduction.
   283  // 1. Remove sanitizers that are not needed to trigger the target class of bugs.
   284  // 2. Disable unrelated kernel subsystems. This is done by bisecting config changes between
   285  // `original` and `baseline`.
   286  // (*) After an unnecessary config is deleted, we still have pred() == BisectBad.
   287  func (ctx *linux) Minimize(target *targets.Target, original, baseline []byte, types []crash.Type,
   288  	dt debugtracer.DebugTracer, pred func(test []byte) (BisectResult, error)) ([]byte, error) {
   289  	if bytes.HasPrefix(original, []byte(configBisectTag)) {
   290  		dt.Log("# configuration already minimized\n")
   291  		return original, nil
   292  	}
   293  	kconf, err := kconfig.Parse(target, filepath.Join(ctx.git.dir, "Kconfig"))
   294  	if err != nil {
   295  		return nil, fmt.Errorf("%w: %w", ErrBadKconfig, err)
   296  	}
   297  	config, err := kconfig.ParseConfigData(original, "original")
   298  	if err != nil {
   299  		return nil, err
   300  	}
   301  	minimizeCtx := &minimizeLinuxCtx{
   302  		kconf:  kconf,
   303  		config: config,
   304  		pred: func(cfg *kconfig.ConfigFile) (bool, error) {
   305  			res, err := pred(serialize(cfg))
   306  			return res == BisectBad, err
   307  		},
   308  		transform: func(cfg *kconfig.ConfigFile) {
   309  			setLinuxTagConfigs(cfg, nil)
   310  		},
   311  		DebugTracer: dt,
   312  	}
   313  	if len(types) > 0 {
   314  		// Technically, as almost all sanitizers are Yes/No config options, we could have
   315  		// achieved this minimization simply by disabling them all in the baseline config.
   316  		// However, we are now trying to make the most out of the few config minimization
   317  		// iterations we're ready to make do during the bisection process.
   318  		// Since it's possible to quite reliably determine the needed and unneeded sanitizers
   319  		// just by looking at crash reports, let's prefer a more complicated logic over worse
   320  		// bisection results.
   321  		// Once we start doing proper config minimizations for every reproducer, we can delete
   322  		// most of the related code.
   323  		err := minimizeCtx.dropInstrumentation(types)
   324  		if err != nil {
   325  			return nil, err
   326  		}
   327  	}
   328  	if len(baseline) > 0 {
   329  		baselineConfig, err := kconfig.ParseConfigData(baseline, "baseline")
   330  		// If we fail to parse the baseline config proceed with original one as baseline config
   331  		// is an optional parameter.
   332  		if err != nil {
   333  			return nil, fmt.Errorf("%w: %w", ErrBadKconfig, err)
   334  		}
   335  		err = minimizeCtx.minimizeAgainst(baselineConfig)
   336  		if err != nil {
   337  			return nil, err
   338  		}
   339  	}
   340  	return minimizeCtx.getConfig(), nil
   341  }
   342  
   343  func serialize(cf *kconfig.ConfigFile) []byte {
   344  	return []byte(fmt.Sprintf("%v, rev: %v\n%s", configBisectTag, prog.GitRevision, cf.Serialize()))
   345  }
   346  
   347  type minimizeLinuxCtx struct {
   348  	kconf     *kconfig.KConfig
   349  	config    *kconfig.ConfigFile
   350  	pred      func(*kconfig.ConfigFile) (bool, error)
   351  	transform func(*kconfig.ConfigFile)
   352  	debugtracer.DebugTracer
   353  }
   354  
   355  func (ctx *minimizeLinuxCtx) minimizeAgainst(base *kconfig.ConfigFile) error {
   356  	base = base.Clone()
   357  	ctx.transform(base)
   358  	// Don't do too many minimization runs, it will make bug bisections too long.
   359  	// The purpose is only to reduce the number of build/boot/test errors due to bugs
   360  	// in unrelated parts of the kernel.
   361  	// Bisection is not getting much faster with smaller configs, only more reliable,
   362  	// so there's a trade-off. Try to do best in 5 iterations, that's about 1.5 hours.
   363  	const minimizeRuns = 5
   364  	minConfig, err := ctx.kconf.Minimize(base, ctx.config, ctx.runPred, minimizeRuns, ctx)
   365  	if err != nil {
   366  		return err
   367  	}
   368  	ctx.config = minConfig
   369  	return nil
   370  }
   371  
   372  func (ctx *minimizeLinuxCtx) dropInstrumentation(types []crash.Type) error {
   373  	ctx.Log("check whether we can drop unnecessary instrumentation")
   374  	oldTransform := ctx.transform
   375  	transform := func(c *kconfig.ConfigFile) {
   376  		oldTransform(c)
   377  		setLinuxSanitizerConfigs(c, types, ctx)
   378  	}
   379  	newConfig := ctx.config.Clone()
   380  	transform(newConfig)
   381  	if bytes.Equal(ctx.config.Serialize(), newConfig.Serialize()) {
   382  		ctx.Log("there was nothing we could disable; skip")
   383  		return nil
   384  	}
   385  	ctx.SaveFile("no-instrumentation.config", newConfig.Serialize())
   386  	ok, err := ctx.runPred(newConfig)
   387  	if err != nil {
   388  		return err
   389  	}
   390  	if ok {
   391  		ctx.Log("the bug reproduces without the instrumentation")
   392  		ctx.transform = transform
   393  		ctx.config = newConfig
   394  	}
   395  	return nil
   396  }
   397  
   398  func (ctx *minimizeLinuxCtx) runPred(cfg *kconfig.ConfigFile) (bool, error) {
   399  	cfg = cfg.Clone()
   400  	ctx.transform(cfg)
   401  	return ctx.pred(cfg)
   402  }
   403  
   404  func (ctx *minimizeLinuxCtx) getConfig() []byte {
   405  	ctx.transform(ctx.config)
   406  	return serialize(ctx.config)
   407  }