github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/vcs/linux.go (about)

     1  // Copyright 2019 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package vcs
     5  
     6  import (
     7  	"bytes"
     8  	"errors"
     9  	"fmt"
    10  	"net/mail"
    11  	"path/filepath"
    12  	"regexp"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/google/syzkaller/pkg/debugtracer"
    18  	"github.com/google/syzkaller/pkg/kconfig"
    19  	"github.com/google/syzkaller/pkg/osutil"
    20  	"github.com/google/syzkaller/pkg/report/crash"
    21  	"github.com/google/syzkaller/prog"
    22  	"github.com/google/syzkaller/sys/targets"
    23  )
    24  
    25  type linux struct {
    26  	*gitRepo
    27  	vmType string
    28  }
    29  
    30  var (
    31  	_ Bisecter        = new(linux)
    32  	_ ConfigMinimizer = new(linux)
    33  )
    34  
    35  func newLinux(dir string, opts []RepoOpt, vmType string) *linux {
    36  	ignoreCC := map[string]bool{
    37  		"stable@vger.kernel.org": true,
    38  	}
    39  
    40  	return &linux{
    41  		gitRepo: newGitRepo(dir, ignoreCC, opts),
    42  		vmType:  vmType,
    43  	}
    44  }
    45  
    46  func (ctx *linux) PreviousReleaseTags(commit, compilerType string) ([]string, error) {
    47  	tags, err := ctx.gitRepo.previousReleaseTags(commit, false, false, false)
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  
    52  	cutoff := ""
    53  	switch compilerType {
    54  	case "gcc":
    55  		// Initially we tried to stop at 3.8 because:
    56  		// v3.8 does not work with modern perl, and as we go further in history
    57  		// make stops to work, then binutils, glibc, etc. So we stop at v3.8.
    58  		// Up to that point we only need an ancient gcc.
    59  		//
    60  		// But kernels don't boot starting from 4.0 and back.
    61  		// That was fixed by 99124e4db5b7b70daeaaf1d88a6a8078a0004c6e,
    62  		// and it can be cherry-picked into 3.14..4.0 but it conflicts for 3.13 and older.
    63  		//
    64  		// But starting from 4.0 our user-space binaries start crashing with
    65  		// assorted errors which suggests process memory corruption by kernel.
    66  		//
    67  		// We used to use 4.1 as the oldest tested release (it works in general).
    68  		// However, there is correlation between how far back we go and probability
    69  		// of getting correct result (see #1532). So we then stopped at 4.6.
    70  		// 4.6 is somewhat arbitrary, we've seen lots of wrong results in 4.5..4.6 range,
    71  		// but there is definitive reason for 4.6. Most likely later we want to bump it
    72  		// even more (as new releases are produced). Next good candidate may be 4.11
    73  		// because then we won't need gcc 5.5.
    74  		//
    75  		// TODO: The buildroot images deployed after #2820 can only boot v4.19+ kernels.
    76  		// This has caused lots of bad bisection results, see #3224. We either need a new
    77  		// universal image or a kernel version dependant image selection.
    78  		cutoff = "v4.18"
    79  	case "clang":
    80  		// v5.3 was the first release with solid clang support, however I was able to
    81  		// compile v5.1..v5.3 using a newer defconfig + make oldconfig. Everything older
    82  		// would require further cherry-picks.
    83  		cutoff = "v5.2"
    84  	}
    85  
    86  	for i, tag := range tags {
    87  		if tag == cutoff {
    88  			tags = tags[:i]
    89  			break
    90  		}
    91  	}
    92  	return tags, nil
    93  }
    94  
    95  func gitParseReleaseTags(output []byte, includeRC bool) []string {
    96  	var tags []string
    97  	for _, tag := range bytes.Split(output, []byte{'\n'}) {
    98  		if gitReleaseTagToInt(string(tag), includeRC) != 0 {
    99  			tags = append(tags, string(tag))
   100  		}
   101  	}
   102  	sort.Slice(tags, func(i, j int) bool {
   103  		return gitReleaseTagToInt(tags[i], includeRC) > gitReleaseTagToInt(tags[j], includeRC)
   104  	})
   105  	return tags
   106  }
   107  
   108  func gitReleaseTagToInt(tag string, includeRC bool) uint64 {
   109  	v1, v2, rc, v3 := ParseReleaseTag(tag)
   110  	if v1 < 0 {
   111  		return 0
   112  	}
   113  	v3 = max(v3, 0)
   114  	if rc >= 0 {
   115  		if !includeRC {
   116  			return 0
   117  		}
   118  	} else {
   119  		rc = 999
   120  	}
   121  	return uint64(v1)*1e9 + uint64(v2)*1e6 + uint64(rc)*1e3 + uint64(v3)
   122  }
   123  
   124  func (ctx *linux) EnvForCommit(
   125  	defaultCompiler, compilerType, binDir, commit string, kernelConfig []byte,
   126  	backports []BackportCommit,
   127  ) (*BisectEnv, error) {
   128  	tagList, err := ctx.previousReleaseTags(commit, true, false, false)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  	tags := make(map[string]bool)
   133  	for _, tag := range tagList {
   134  		tags[tag] = true
   135  	}
   136  	cf, err := kconfig.ParseConfigData(kernelConfig, "config")
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	setLinuxTagConfigs(cf, tags)
   141  
   142  	compiler := ""
   143  	switch compilerType {
   144  	case "gcc":
   145  		compiler = linuxGCCPath(tags, binDir, defaultCompiler)
   146  	case "clang":
   147  		compiler = linuxClangPath(tags, binDir, defaultCompiler)
   148  	default:
   149  		return nil, fmt.Errorf("unsupported bisect compiler: %v", compilerType)
   150  	}
   151  
   152  	env := &BisectEnv{
   153  		Compiler:     compiler,
   154  		KernelConfig: cf.Serialize(),
   155  	}
   156  	err = linuxFixBackports(ctx.gitRepo, backports...)
   157  	if err != nil {
   158  		return nil, fmt.Errorf("failed to cherry pick fixes: %w", err)
   159  	}
   160  	return env, nil
   161  }
   162  
   163  func linuxClangPath(tags map[string]bool, binDir, defaultCompiler string) string {
   164  	version := ""
   165  	// The defaultCompiler and clang-15 are assumed to be available.
   166  	switch {
   167  	case tags["v6.15"]:
   168  		return defaultCompiler
   169  	case tags["v5.9"]:
   170  		return "clang-15"
   171  	default:
   172  		// everything before v5.3 might not work great
   173  		// everything before v5.1 does not work
   174  		version = "9.0.1"
   175  	}
   176  	return filepath.Join(binDir, "llvm-"+version, "bin", "clang")
   177  }
   178  
   179  func linuxGCCPath(tags map[string]bool, binDir, defaultCompiler string) string {
   180  	version := ""
   181  	switch {
   182  	case tags["v5.16"]:
   183  		// Verified to work with 15.0.7.
   184  		return defaultCompiler
   185  	case tags["v5.9"]:
   186  		version = "10.1.0"
   187  	case tags["v4.12"]:
   188  		version = "8.1.0"
   189  	case tags["v4.11"]:
   190  		version = "7.3.0"
   191  	default:
   192  		version = "5.5.0"
   193  	}
   194  	return filepath.Join(binDir, "gcc-"+version, "bin", "gcc")
   195  }
   196  
   197  func (ctx *linux) PrepareBisect() error {
   198  	if ctx.vmType != targets.GVisor {
   199  		// Some linux repos we fuzz don't import the upstream release git tags. We need tags
   200  		// to decide which compiler versions to use. Let's fetch upstream for its tags.
   201  		err := ctx.gitRepo.fetchRemote("https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git", "")
   202  		if err != nil {
   203  			return fmt.Errorf("fetching upstream linux failed: %w", err)
   204  		}
   205  	}
   206  	return nil
   207  }
   208  
   209  func (ctx *linux) Bisect(bad, good string, dt debugtracer.DebugTracer, pred func() (BisectResult,
   210  	error)) ([]*Commit, error) {
   211  	commits, err := ctx.gitRepo.Bisect(bad, good, dt, pred)
   212  	if len(commits) == 1 {
   213  		ctx.addMaintainers(commits[0])
   214  	}
   215  	return commits, err
   216  }
   217  
   218  func (ctx *linux) addMaintainers(com *Commit) {
   219  	if len(com.Recipients) > 2 {
   220  		return
   221  	}
   222  	mtrs := ctx.getMaintainers(com.Hash, false)
   223  	if len(mtrs) < 3 {
   224  		mtrs = ctx.getMaintainers(com.Hash, true)
   225  	}
   226  	com.Recipients = append(com.Recipients, mtrs...)
   227  	sort.Sort(com.Recipients)
   228  }
   229  
   230  func (ctx *linux) getMaintainers(hash string, blame bool) Recipients {
   231  	// See #1441 re --git-min-percent.
   232  	args := "git show " + hash + " | " +
   233  		filepath.FromSlash("scripts/get_maintainer.pl") +
   234  		" --git-min-percent=20"
   235  	if blame {
   236  		args += " --git-blame"
   237  	}
   238  	output, err := osutil.RunCmd(time.Minute, ctx.gitRepo.Dir, "bash", "-c", args)
   239  	if err != nil {
   240  		return nil
   241  	}
   242  	return ParseMaintainersLinux(output)
   243  }
   244  
   245  func ParseMaintainersLinux(text []byte) Recipients {
   246  	lines := strings.Split(string(text), "\n")
   247  	reRole := regexp.MustCompile(` \([^)]+\)$`)
   248  	var mtrs Recipients
   249  	// LMKL is To by default, but it changes to Cc if there's also a subsystem list.
   250  	lkmlType := To
   251  	foundLkml := false
   252  	for _, line := range lines {
   253  		role := reRole.FindString(line)
   254  		address := strings.Replace(line, role, "", 1)
   255  		addr, err := mail.ParseAddress(address)
   256  		if err != nil {
   257  			continue
   258  		}
   259  		var roleType RecipientType
   260  		if addr.Address == "linux-kernel@vger.kernel.org" {
   261  			foundLkml = true
   262  			continue
   263  		} else if strings.Contains(role, "list") {
   264  			lkmlType = Cc
   265  			roleType = To
   266  		} else if strings.Contains(role, "maintainer") || strings.Contains(role, "supporter") {
   267  			roleType = To
   268  		} else {
   269  			roleType = Cc // Reviewer or other role; default to Cc.
   270  		}
   271  		mtrs = append(mtrs, RecipientInfo{*addr, roleType})
   272  	}
   273  	if foundLkml {
   274  		mtrs = append(mtrs, RecipientInfo{mail.Address{Address: "linux-kernel@vger.kernel.org"}, lkmlType})
   275  	}
   276  	sort.Sort(mtrs)
   277  	return mtrs
   278  }
   279  
   280  var ErrBadKconfig = errors.New("failed to parse Kconfig")
   281  
   282  const configBisectTag = "# Minimized by syzkaller"
   283  
   284  // Minimize() attempts to drop Linux kernel configs that are unnecessary(*) for bug reproduction.
   285  // 1. Remove sanitizers that are not needed to trigger the target class of bugs.
   286  // 2. Disable unrelated kernel subsystems. This is done by bisecting config changes between
   287  // `original` and `baseline`.
   288  // (*) After an unnecessary config is deleted, we still have pred() == BisectBad.
   289  func (ctx *linux) Minimize(target *targets.Target, original, baseline []byte, types []crash.Type,
   290  	dt debugtracer.DebugTracer, pred func(test []byte) (BisectResult, error)) ([]byte, error) {
   291  	if bytes.HasPrefix(original, []byte(configBisectTag)) {
   292  		dt.Log("# configuration already minimized\n")
   293  		return original, nil
   294  	}
   295  	kconf, err := kconfig.Parse(target, filepath.Join(ctx.gitRepo.Dir, "Kconfig"))
   296  	if err != nil {
   297  		return nil, fmt.Errorf("%w: %w", ErrBadKconfig, err)
   298  	}
   299  	config, err := kconfig.ParseConfigData(original, "original")
   300  	if err != nil {
   301  		return nil, err
   302  	}
   303  	minimizeCtx := &minimizeLinuxCtx{
   304  		kconf:  kconf,
   305  		config: config,
   306  		pred: func(cfg *kconfig.ConfigFile) (bool, error) {
   307  			res, err := pred(serialize(cfg))
   308  			return res == BisectBad, err
   309  		},
   310  		transform: func(cfg *kconfig.ConfigFile) {
   311  			setLinuxTagConfigs(cfg, nil)
   312  		},
   313  		DebugTracer: dt,
   314  	}
   315  	if len(types) > 0 {
   316  		// Technically, as almost all sanitizers are Yes/No config options, we could have
   317  		// achieved this minimization simply by disabling them all in the baseline config.
   318  		// However, we are now trying to make the most out of the few config minimization
   319  		// iterations we're ready to make do during the bisection process.
   320  		// Since it's possible to quite reliably determine the needed and unneeded sanitizers
   321  		// just by looking at crash reports, let's prefer a more complicated logic over worse
   322  		// bisection results.
   323  		// Once we start doing proper config minimizations for every reproducer, we can delete
   324  		// most of the related code.
   325  		err := minimizeCtx.dropInstrumentation(types)
   326  		if err != nil {
   327  			return nil, err
   328  		}
   329  	}
   330  	if len(baseline) > 0 {
   331  		baselineConfig, err := kconfig.ParseConfigData(baseline, "baseline")
   332  		// If we fail to parse the baseline config proceed with original one as baseline config
   333  		// is an optional parameter.
   334  		if err != nil {
   335  			return nil, fmt.Errorf("%w: %w", ErrBadKconfig, err)
   336  		}
   337  		err = minimizeCtx.minimizeAgainst(baselineConfig)
   338  		if err != nil {
   339  			return nil, err
   340  		}
   341  	}
   342  	return minimizeCtx.getConfig(), nil
   343  }
   344  
   345  func serialize(cf *kconfig.ConfigFile) []byte {
   346  	return []byte(fmt.Sprintf("%v, rev: %v\n%s", configBisectTag, prog.GitRevision, cf.Serialize()))
   347  }
   348  
   349  type minimizeLinuxCtx struct {
   350  	kconf     *kconfig.KConfig
   351  	config    *kconfig.ConfigFile
   352  	pred      func(*kconfig.ConfigFile) (bool, error)
   353  	transform func(*kconfig.ConfigFile)
   354  	debugtracer.DebugTracer
   355  }
   356  
   357  func (ctx *minimizeLinuxCtx) minimizeAgainst(base *kconfig.ConfigFile) error {
   358  	base = base.Clone()
   359  	ctx.transform(base)
   360  	// Don't do too many minimization runs, it will make bug bisections too long.
   361  	// The purpose is only to reduce the number of build/boot/test errors due to bugs
   362  	// in unrelated parts of the kernel.
   363  	// Bisection is not getting much faster with smaller configs, only more reliable,
   364  	// so there's a trade-off. Try to do best in 5 iterations, that's about 1.5 hours.
   365  	const minimizeRuns = 5
   366  	minConfig, err := ctx.kconf.Minimize(base, ctx.config, ctx.runPred, minimizeRuns, ctx)
   367  	if err != nil {
   368  		return err
   369  	}
   370  	ctx.config = minConfig
   371  	return nil
   372  }
   373  
   374  func (ctx *minimizeLinuxCtx) dropInstrumentation(types []crash.Type) error {
   375  	ctx.Log("check whether we can drop unnecessary instrumentation")
   376  	oldTransform := ctx.transform
   377  	transform := func(c *kconfig.ConfigFile) {
   378  		oldTransform(c)
   379  		setLinuxSanitizerConfigs(c, types, ctx)
   380  	}
   381  	newConfig := ctx.config.Clone()
   382  	transform(newConfig)
   383  	if bytes.Equal(ctx.config.Serialize(), newConfig.Serialize()) {
   384  		ctx.Log("there was nothing we could disable; skip")
   385  		return nil
   386  	}
   387  	ctx.SaveFile("no-instrumentation.config", newConfig.Serialize())
   388  	ok, err := ctx.runPred(newConfig)
   389  	if err != nil {
   390  		return err
   391  	}
   392  	if ok {
   393  		ctx.Log("the bug reproduces without the instrumentation")
   394  		ctx.transform = transform
   395  		ctx.config = newConfig
   396  	}
   397  	return nil
   398  }
   399  
   400  func (ctx *minimizeLinuxCtx) runPred(cfg *kconfig.ConfigFile) (bool, error) {
   401  	cfg = cfg.Clone()
   402  	ctx.transform(cfg)
   403  	return ctx.pred(cfg)
   404  }
   405  
   406  func (ctx *minimizeLinuxCtx) getConfig() []byte {
   407  	ctx.transform(ctx.config)
   408  	return serialize(ctx.config)
   409  }