github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/dashboard/app/tree.go (about)

     1  // Copyright 2023 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  // Given information on how commits flow from one kernel source tree to another, assign
     7  // bugs labels of two kinds:
     8  // a) LabelIntroduced -- reproducer does not work in any other kernel tree, FROM which commits flow.
     9  // b) LabelReached -- reproducer does not work in any other kernel tree, TO which commits flow.
    10  
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"sort"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/google/syzkaller/dashboard/dashapi"
    19  	"golang.org/x/sync/errgroup"
    20  	db "google.golang.org/appengine/v2/datastore"
    21  	"google.golang.org/appengine/v2/log"
    22  )
    23  
    24  // generateTreeOriginJobs generates new jobs for bug origin tree determination.
    25  func generateTreeOriginJobs(cGlobal context.Context, bugKey *db.Key,
    26  	managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) {
    27  	var job *Job
    28  	var jobKey *db.Key
    29  	tx := func(c context.Context) error {
    30  		bug := new(Bug)
    31  		if err := db.Get(c, bugKey, bug); err != nil {
    32  			return fmt.Errorf("failed to get bug: %w", err)
    33  		}
    34  		ctx := &bugTreeContext{
    35  			c:       c,
    36  			cGlobal: cGlobal,
    37  			bug:     bug,
    38  			bugKey:  bug.key(c),
    39  		}
    40  		ret := ctx.pollBugTreeJobs(managers)
    41  		switch ret.(type) {
    42  		case pollResultError:
    43  			return ret.(error)
    44  		case pollResultWait:
    45  			newTime, ok := ret.(time.Time)
    46  			if ok && newTime.After(bug.TreeTests.NextPoll) {
    47  				bug.TreeTests.NextPoll = newTime
    48  			}
    49  		}
    50  		bug.TreeTests.NeedPoll = false
    51  		if _, err := db.Put(c, bugKey, bug); err != nil {
    52  			return fmt.Errorf("failed to put bug: %w", err)
    53  		}
    54  		job, jobKey = ctx.job, ctx.jobKey
    55  		return nil
    56  	}
    57  	if err := db.RunInTransaction(cGlobal, tx,
    58  		&db.TransactionOptions{XG: true, Attempts: 10}); err != nil {
    59  		return nil, nil, err
    60  	}
    61  	return job, jobKey, nil
    62  }
    63  
    64  // treeOriginJobDone is supposed to be called when tree origin job is done.
    65  // It keeps the cached info in Bug up to date and assigns bug tree origin labels.
    66  func treeOriginJobDone(cGlobal context.Context, jobKey *db.Key, job *Job) error {
    67  	bugKey := jobKey.Parent()
    68  	tx := func(c context.Context) error {
    69  		bug := new(Bug)
    70  		if err := db.Get(c, bugKey, bug); err != nil {
    71  			return fmt.Errorf("failed to get bug: %w", err)
    72  		}
    73  		ctx := &bugTreeContext{
    74  			c:         c,
    75  			cGlobal:   cGlobal,
    76  			bug:       bug,
    77  			bugKey:    bug.key(c),
    78  			noNewJobs: true,
    79  		}
    80  		ret := ctx.pollBugTreeJobs(
    81  			map[string]dashapi.ManagerJobs{job.Manager: {TestPatches: true}},
    82  		)
    83  		switch ret.(type) {
    84  		case pollResultError:
    85  			return ret.(error)
    86  		case pollResultPending:
    87  			bug.TreeTests.NextPoll = time.Time{}
    88  			bug.TreeTests.NeedPoll = true
    89  		}
    90  		if _, err := db.Put(c, bugKey, bug); err != nil {
    91  			return fmt.Errorf("failed to put bug: %w", err)
    92  		}
    93  		return nil
    94  	}
    95  	return db.RunInTransaction(cGlobal, tx, &db.TransactionOptions{XG: true, Attempts: 10})
    96  }
    97  
    98  type pollTreeJobResult interface{}
    99  
   100  // pollResultPending is returned when we wait some job to finish.
   101  type pollResultPending struct{}
   102  
   103  // pollResultWait is returned when we know the next time the process could be repeated.
   104  type pollResultWait time.Time
   105  
   106  // pollResultSkip means that there are no poll jobs we could run at the moment.
   107  // It's impossible to say when it changes, so it's better not to repeat polling soon.
   108  type pollResultSkip struct{}
   109  
   110  type pollResultError error
   111  
   112  type pollResultDone struct {
   113  	Crashed  bool
   114  	Finished time.Time
   115  }
   116  
   117  type bugTreeContext struct {
   118  	c context.Context
   119  	// Datastore puts limits on how often a single entity can be accessed by transactions.
   120  	// And we actually don't always need a consistent view of the DB, we just want to query
   121  	// a single entity. So, when possible, let's make queries outside of a transaction.
   122  	cGlobal   context.Context
   123  	crash     *Crash
   124  	crashKey  *db.Key
   125  	bugKey    *db.Key
   126  	bug       *Bug
   127  	build     *Build
   128  	repoNode  *repoNode
   129  	noNewJobs bool
   130  
   131  	// If any jobs were created, here'll be one of them.
   132  	job    *Job
   133  	jobKey *db.Key
   134  }
   135  
   136  func (ctx *bugTreeContext) pollBugTreeJobs(managers map[string]dashapi.ManagerJobs) pollTreeJobResult {
   137  	// Determine the crash we'd stick to.
   138  	err := ctx.loadCrashInfo()
   139  	if err != nil {
   140  		log.Errorf(ctx.c, "bug %q: failed to load crash info: %s", ctx.bug.displayTitle(), err)
   141  		return pollResultError(err)
   142  	}
   143  	if ctx.crash == nil {
   144  		// There are no crashes we could further work with.
   145  		// TODO: consider looking at the recent repro retest results.
   146  		log.Infof(ctx.c, "bug %q: no suitable crash", ctx.bug.displayTitle())
   147  		return pollResultSkip{}
   148  	}
   149  	if ctx.repoNode == nil {
   150  		// We have no information about the tree on which the bug happened.
   151  		log.Errorf(ctx.c, "bug %q: no information about the tree", ctx.bug.displayTitle())
   152  		return pollResultSkip{}
   153  	}
   154  	if !managers[ctx.crash.Manager].TestPatches {
   155  		return pollResultSkip{}
   156  	}
   157  	if len(ctx.bug.TreeTests.List) > 0 && ctx.crashKey.IntID() != ctx.bug.TreeTests.List[0].CrashID {
   158  		// Clean up old job records, they are no longer relevant.
   159  		ctx.bug.TreeTests.List = nil
   160  	}
   161  	for i := range ctx.bug.TreeTests.List {
   162  		err := ctx.bug.TreeTests.List[i].applyPending(ctx.c)
   163  		if err != nil {
   164  			return pollResultError(err)
   165  		}
   166  	}
   167  	return ctx.groupResults([]pollTreeJobResult{
   168  		ctx.setOriginLabels(),
   169  		ctx.missingBackports(),
   170  	})
   171  }
   172  
   173  func (ctx *bugTreeContext) setOriginLabels() pollTreeJobResult {
   174  	if !ctx.labelsCanBeSet() || ctx.bug.HasUserLabel(OriginLabel) {
   175  		return pollResultSkip{}
   176  	}
   177  	ctx.bug.UnsetLabels(OriginLabel)
   178  
   179  	var results []pollTreeJobResult
   180  	perNode := map[*repoNode]pollTreeJobResult{}
   181  	for node, merge := range ctx.repoNode.allReachable() {
   182  		var result pollTreeJobResult
   183  		if merge {
   184  			// Merge base gives a much better result quality, so use it whenever possible.
   185  			result = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{
   186  				Repo:   ctx.build.KernelRepo,
   187  				Branch: ctx.build.KernelBranch,
   188  			})
   189  		} else {
   190  			result = ctx.runRepro(node.repo, wantFirstAny{}, runOnHEAD{})
   191  		}
   192  		perNode[node] = result
   193  		results = append(results, result)
   194  	}
   195  	result := ctx.groupResults(results)
   196  	if _, ok := result.(pollResultPending); ok {
   197  		// At least wait until all started jobs have finished (successfully or not).
   198  		return result
   199  	}
   200  	lastDone := ctx.lastDone(results)
   201  	if lastDone.IsZero() {
   202  		// Demand that at least one of the finished jobs has finished successfully.
   203  		return pollResultSkip{}
   204  	}
   205  	// Since we have a repro for it, it definitely crashed at some point.
   206  	perNode[ctx.repoNode] = pollResultDone{Crashed: true}
   207  	allLabels := append(ctx.selectRepoLabels(true, perNode), ctx.selectRepoLabels(false, perNode)...)
   208  	for _, label := range allLabels {
   209  		if label == ctx.repoNode.repo.LabelIntroduced || label == ctx.repoNode.repo.LabelReached {
   210  			// It looks like our reproducer does not work on other trees.
   211  			// Just in case verify that it still works on the original one.
   212  			result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(lastDone), runOnHEAD{})
   213  			resultDone, ok := result.(pollResultDone)
   214  			if !ok {
   215  				return result
   216  			}
   217  			if !resultDone.Crashed {
   218  				// Unfortunately the repro no longer works. Don't assign labels.
   219  				return pollResultSkip{}
   220  			}
   221  		}
   222  	}
   223  	var labels []BugLabel
   224  	for _, label := range allLabels {
   225  		labels = append(labels, BugLabel{Label: OriginLabel, Value: label})
   226  	}
   227  	ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), labels)
   228  	return pollResultSkip{}
   229  }
   230  
   231  // selectRepoLabels attributes bugs to trees depending on the patch testing results.
   232  func (ctx *bugTreeContext) selectRepoLabels(in bool, results map[*repoNode]pollTreeJobResult) []string {
   233  	crashed := map[*repoNode]bool{}
   234  	for node, result := range results {
   235  		done, ok := result.(pollResultDone)
   236  		if ok {
   237  			crashed[node] = done.Crashed
   238  		}
   239  	}
   240  	for node := range crashed {
   241  		if !crashed[node] {
   242  			continue
   243  		}
   244  		// (1) The in = true case:
   245  		// If, for a tree X, there's a tree Y from which commits flow to X and the reproducer crashed
   246  		// on Y, X cannot be among bug origin trees.
   247  		// (1) The in = false case:
   248  		// If, for a tree X, there's a tree Y to which commits flow to X and the reproducer crashed
   249  		// on Y, X cannot be the last tree to which the bug has spread.
   250  		for otherNode := range node.reachable(!in) {
   251  			crashed[otherNode] = false
   252  		}
   253  	}
   254  	ret := []string{}
   255  	for node, set := range crashed {
   256  		if !set {
   257  			continue
   258  		}
   259  		if in && node.repo.LabelIntroduced != "" {
   260  			ret = append(ret, node.repo.LabelIntroduced)
   261  		} else if !in && node.repo.LabelReached != "" {
   262  			ret = append(ret, node.repo.LabelReached)
   263  		}
   264  	}
   265  	return ret
   266  }
   267  
   268  // Test if there's any sense in testing other trees.
   269  // For example, if we hit a bug on a mainline, there's no sense to test linux-next to check
   270  // if it's a linux-next bug.
   271  func (ctx *bugTreeContext) labelsCanBeSet() bool {
   272  	for node := range ctx.repoNode.reachable(true) {
   273  		if node.repo.LabelIntroduced != "" {
   274  			return true
   275  		}
   276  	}
   277  	for node := range ctx.repoNode.reachable(false) {
   278  		if node.repo.LabelReached != "" {
   279  			return true
   280  		}
   281  	}
   282  	return ctx.repoNode.repo.LabelIntroduced != "" ||
   283  		ctx.repoNode.repo.LabelReached != ""
   284  }
   285  
   286  func (ctx *bugTreeContext) missingBackports() pollTreeJobResult {
   287  	if !ctx.repoNode.repo.DetectMissingBackports || ctx.bug.HasUserLabel(MissingBackportLabel) {
   288  		return pollResultSkip{}
   289  	}
   290  	var okDate time.Time
   291  	results := []pollTreeJobResult{}
   292  	for node, merge := range ctx.repoNode.reachable(true) {
   293  		resultOK := ctx.runRepro(node.repo, wantFirstOK{}, runOnHEAD{})
   294  		doneOK, ok := resultOK.(pollResultDone)
   295  		if !ok {
   296  			results = append(results, resultOK)
   297  			continue
   298  		}
   299  		var resultCrash pollTreeJobResult
   300  		if merge {
   301  			resultCrash = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{
   302  				Repo:   ctx.build.KernelRepo,
   303  				Branch: ctx.build.KernelBranch,
   304  			})
   305  		} else {
   306  			// We already know that the reproducer doesn't crash the tree.
   307  			// There'd be no sense to call runRepro in the hope of getting a crash,
   308  			// so let's just look into the past tree testing results.
   309  			resultCrash, _ = ctx.bug.findResult(ctx.c, node.repo, wantFirstCrash{}, runOnAny{})
   310  		}
   311  		doneCrash, ok := resultCrash.(pollResultDone)
   312  		if !ok {
   313  			results = append(results, resultCrash)
   314  			continue
   315  		} else if merge && doneCrash.Crashed || doneOK.Finished.After(doneCrash.Finished) {
   316  			// That's what we want: earlier it crashed and then stopped.
   317  			okDate = doneOK.Finished
   318  			break
   319  		}
   320  	}
   321  	if okDate.IsZero() {
   322  		return ctx.groupResults(results)
   323  	}
   324  	// We are about to assign the "missing backport" label.
   325  	// To reduce the number of backports, just in case run once more on HEAD.
   326  	// The bug fix could have already reached the repository.
   327  	result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(okDate), runOnHEAD{})
   328  	resultDone, ok := result.(pollResultDone)
   329  	if !ok {
   330  		return result
   331  	}
   332  	ctx.bug.UnsetLabels(MissingBackportLabel)
   333  	if resultDone.Crashed {
   334  		ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), []BugLabel{
   335  			{Label: MissingBackportLabel},
   336  		})
   337  	}
   338  	return pollResultSkip{}
   339  }
   340  
   341  func (ctx *bugTreeContext) lastDone(results []pollTreeJobResult) time.Time {
   342  	var maxTime time.Time
   343  	for _, item := range results {
   344  		done, ok := item.(pollResultDone)
   345  		if !ok {
   346  			continue
   347  		}
   348  		if done.Finished.After(maxTime) {
   349  			maxTime = done.Finished
   350  		}
   351  	}
   352  	return maxTime
   353  }
   354  
   355  func (ctx *bugTreeContext) groupResults(results []pollTreeJobResult) pollTreeJobResult {
   356  	var minWait time.Time
   357  	for _, result := range results {
   358  		switch v := result.(type) {
   359  		case pollResultPending, pollResultError:
   360  			// Wait for the job result to continue.
   361  			return result
   362  		case pollResultWait:
   363  			t := time.Time(v)
   364  			if minWait.IsZero() || minWait.After(t) {
   365  				minWait = t
   366  			}
   367  		}
   368  	}
   369  	if !minWait.IsZero() {
   370  		return pollResultWait(minWait)
   371  	}
   372  	return pollResultSkip{}
   373  }
   374  
   375  type expectedResult interface{}
   376  
   377  // resultFreshness subtypes.
   378  type wantFirstOK struct{}
   379  type wantFirstCrash struct{}
   380  type wantFirstAny struct{}
   381  type wantNewAny time.Time
   382  
   383  type runReproOn interface{}
   384  
   385  // runReproOn subtypes.
   386  type runOnAny struct{} // attempts to find any result, if unsuccessful, runs on HEAD
   387  type runOnHEAD struct{}
   388  type runOnMergeBase struct {
   389  	Repo   string
   390  	Branch string
   391  }
   392  
   393  func (ctx *bugTreeContext) runRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult {
   394  	ret := ctx.doRunRepro(repo, result, runOn)
   395  	log.Infof(ctx.c, "runRepro on %s, %T, %T: %#v", repo.Alias, result, runOn, ret)
   396  	return ret
   397  }
   398  
   399  func (ctx *bugTreeContext) doRunRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult {
   400  	existingResult, _ := ctx.bug.findResult(ctx.c, repo, result, runOn)
   401  	if _, ok := existingResult.(pollResultSkip); !ok {
   402  		return existingResult
   403  	}
   404  	// Okay, nothing suitable was found. We need to set up a new job.
   405  	if ctx.noNewJobs {
   406  		return pollResultPending{}
   407  	}
   408  	// First check if there's existing BugTreeTest object.
   409  	if _, ok := runOn.(runOnAny); ok {
   410  		runOn = runOnHEAD{}
   411  	}
   412  	candidates := ctx.bug.matchingTreeTests(repo, runOn)
   413  	var bugTreeTest *BugTreeTest
   414  	if len(candidates) > 0 {
   415  		bugTreeTest = &ctx.bug.TreeTests.List[candidates[0]]
   416  	} else {
   417  		item := BugTreeTest{
   418  			CrashID: ctx.crashKey.IntID(),
   419  			Repo:    repo.URL,
   420  			Branch:  repo.Branch,
   421  		}
   422  		if v, ok := runOn.(runOnMergeBase); ok {
   423  			item.MergeBaseRepo = v.Repo
   424  			item.MergeBaseBranch = v.Branch
   425  		}
   426  		ctx.bug.TreeTests.List = append(ctx.bug.TreeTests.List, item)
   427  		bugTreeTest = &ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1]
   428  	}
   429  
   430  	if bugTreeTest.Error != "" {
   431  		const errorRetryTime = 24 * time.Hour * 14
   432  		result := ctx.ensureRepeatPeriod(bugTreeTest.Error, errorRetryTime)
   433  		if _, ok := result.(pollResultSkip); !ok {
   434  			return result
   435  		}
   436  		bugTreeTest.Error = ""
   437  	}
   438  	if bugTreeTest.Last != "" {
   439  		const fixRetryTime = 24 * time.Hour * 45
   440  		result := ctx.ensureRepeatPeriod(bugTreeTest.Last, fixRetryTime)
   441  		if _, ok := result.(pollResultSkip); !ok {
   442  			return result
   443  		}
   444  	}
   445  	var err error
   446  	ctx.job, ctx.jobKey, err = addTestJob(ctx.c, &testJobArgs{
   447  		crash:         ctx.crash,
   448  		crashKey:      ctx.crashKey,
   449  		configRef:     ctx.build.KernelConfig,
   450  		configAppend:  repo.AppendConfig,
   451  		inTransaction: true,
   452  		treeOrigin:    true,
   453  		testReqArgs: testReqArgs{
   454  			bug:             ctx.bug,
   455  			bugKey:          ctx.bugKey,
   456  			repo:            bugTreeTest.Repo,
   457  			branch:          bugTreeTest.Branch,
   458  			mergeBaseRepo:   bugTreeTest.MergeBaseRepo,
   459  			mergeBaseBranch: bugTreeTest.MergeBaseBranch,
   460  		},
   461  	})
   462  	if err != nil {
   463  		return pollResultError(err)
   464  	}
   465  	bugTreeTest.Pending = ctx.jobKey.Encode()
   466  	return pollResultPending{}
   467  }
   468  
   469  func (ctx *bugTreeContext) ensureRepeatPeriod(jobKey string, period time.Duration) pollTreeJobResult {
   470  	job, _, err := fetchJob(ctx.c, jobKey)
   471  	if err != nil {
   472  		return pollResultError(err)
   473  	}
   474  	timePassed := timeNow(ctx.c).Sub(job.Finished)
   475  	if timePassed < period {
   476  		return pollResultWait(job.Finished.Add(period))
   477  	}
   478  	return pollResultSkip{}
   479  }
   480  
   481  func (bug *Bug) findResult(c context.Context,
   482  	repo KernelRepo, result expectedResult, runOn runReproOn) (pollTreeJobResult, *Job) {
   483  	anyPending := false
   484  	for _, i := range bug.matchingTreeTests(repo, runOn) {
   485  		info := &bug.TreeTests.List[i]
   486  		anyPending = anyPending || info.Pending != ""
   487  		key := ""
   488  		switch result.(type) {
   489  		case wantFirstOK:
   490  			key = info.FirstOK
   491  		case wantFirstCrash:
   492  			key = info.FirstCrash
   493  		case wantFirstAny:
   494  			key = info.First
   495  		case wantNewAny:
   496  			key = info.Last
   497  		default:
   498  			return pollResultError(fmt.Errorf("unexpected expected result: %T", result)), nil
   499  		}
   500  		if key == "" {
   501  			continue
   502  		}
   503  		job, _, err := fetchJob(c, key)
   504  		if err != nil {
   505  			return pollResultError(err), nil
   506  		}
   507  		if date, ok := result.(wantNewAny); ok {
   508  			if job.Finished.Before(time.Time(date)) {
   509  				continue
   510  			}
   511  		}
   512  		return pollResultDone{
   513  			Crashed:  job.CrashTitle != "",
   514  			Finished: job.Finished,
   515  		}, job
   516  	}
   517  	if anyPending {
   518  		return pollResultPending{}, nil
   519  	} else {
   520  		return pollResultSkip{}, nil
   521  	}
   522  }
   523  
   524  func (bug *Bug) matchingTreeTests(repo KernelRepo, runOn runReproOn) []int {
   525  	ret := []int{}
   526  	for i, item := range bug.TreeTests.List {
   527  		if item.Repo != repo.URL {
   528  			continue
   529  		}
   530  		ok := true
   531  		switch v := runOn.(type) {
   532  		case runOnHEAD:
   533  			// TODO: should we check for an empty merge base here?
   534  			ok = item.Branch == repo.Branch
   535  		case runOnMergeBase:
   536  			ok = item.Branch == repo.Branch &&
   537  				item.MergeBaseRepo == v.Repo &&
   538  				item.MergeBaseBranch == v.Branch
   539  		}
   540  		if ok {
   541  			ret = append(ret, i)
   542  		}
   543  	}
   544  	return ret
   545  }
   546  
   547  func (ctx *bugTreeContext) loadCrashInfo() error {
   548  	// First look at the crash from previous tests.
   549  	if len(ctx.bug.TreeTests.List) > 0 {
   550  		crashID := ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1].CrashID
   551  		crashKey := db.NewKey(ctx.c, "Crash", "", crashID, ctx.bugKey)
   552  		crash := new(Crash)
   553  		// We need to also tolerate the case when the crash was just deleted.
   554  		err := db.Get(ctx.cGlobal, crashKey, crash)
   555  		if err != nil && err != db.ErrNoSuchEntity {
   556  			return fmt.Errorf("failed to get crash: %w", err)
   557  		} else if err == nil {
   558  			ok, build, err := ctx.isCrashRelevant(crash)
   559  			if err != nil {
   560  				return err
   561  			}
   562  			if ok {
   563  				ctx.build = build
   564  				ctx.crash = crash
   565  				ctx.crashKey = crashKey
   566  			}
   567  		}
   568  	}
   569  
   570  	// Query the most relevant crash with repro.
   571  	crash, crashKey, err := findCrashForBug(ctx.cGlobal, ctx.bug)
   572  	if err != nil {
   573  		return err
   574  	}
   575  	ok, build, err := ctx.isCrashRelevant(crash)
   576  	if err != nil {
   577  		return err
   578  	} else if ok && (ctx.crash == nil || crash.ReportLen > ctx.crash.ReportLen) {
   579  		// Update the crash only if we found a better one.
   580  		ctx.build = build
   581  		ctx.crash = crash
   582  		ctx.crashKey = crashKey
   583  	}
   584  	// Load the rest of the data.
   585  	if ctx.crash != nil {
   586  		var err error
   587  		ns := ctx.bug.Namespace
   588  		repoGraph, err := makeRepoGraph(getNsConfig(ctx.c, ns).Repos)
   589  		if err != nil {
   590  			return err
   591  		}
   592  		ctx.repoNode = repoGraph.nodeByRepo(ctx.build.KernelRepo, ctx.build.KernelBranch)
   593  	}
   594  	return nil
   595  }
   596  
   597  func (ctx *bugTreeContext) isCrashRelevant(crash *Crash) (bool, *Build, error) {
   598  	if crash.ReproIsRevoked {
   599  		// No sense in running the reproducer.
   600  		return false, nil, nil
   601  	} else if crash.ReproC == 0 && crash.ReproSyz == 0 {
   602  		// Let's wait for the repro.
   603  		return false, nil, nil
   604  	}
   605  	newManager, _ := activeManager(ctx.cGlobal, crash.Manager, ctx.bug.Namespace)
   606  	if newManager != crash.Manager {
   607  		// The manager was deprecated since the crash.
   608  		// Let's just ignore such bugs for now.
   609  		return false, nil, nil
   610  	}
   611  	build, err := loadBuild(ctx.cGlobal, ctx.bug.Namespace, crash.BuildID)
   612  	if err != nil {
   613  		return false, nil, err
   614  	}
   615  	mgrBuild, err := lastManagerBuild(ctx.cGlobal, build.Namespace, newManager)
   616  	if err != nil {
   617  		return false, build, err
   618  	}
   619  	// It does happen that we sometimes update the tested tree.
   620  	// It's not frequent at all, but it will make all results very confusing.
   621  	return build.KernelRepo == mgrBuild.KernelRepo &&
   622  		build.KernelBranch == mgrBuild.KernelBranch, build, nil
   623  }
   624  
   625  func (test *BugTreeTest) applyPending(c context.Context) error {
   626  	if test.Pending == "" {
   627  		return nil
   628  	}
   629  	job, _, err := fetchJob(c, test.Pending)
   630  	if err != nil {
   631  		return err
   632  	}
   633  	if job.Finished.IsZero() {
   634  		// Not yet ready.
   635  		return nil
   636  	}
   637  	pendingKey := test.Pending
   638  	test.Pending = ""
   639  	if job.Error != 0 {
   640  		test.Error = pendingKey
   641  		return nil
   642  	}
   643  	test.Last = pendingKey
   644  	if test.First == "" {
   645  		test.First = pendingKey
   646  	}
   647  	if test.FirstOK == "" && job.CrashTitle == "" {
   648  		test.FirstOK = pendingKey
   649  	} else if test.FirstCrash == "" && job.CrashTitle != "" {
   650  		test.FirstCrash = pendingKey
   651  	}
   652  	return nil
   653  }
   654  
   655  // treeTestJobs fetches relevant tree testing results.
   656  func treeTestJobs(c context.Context, bug *Bug) ([]*dashapi.JobInfo, error) {
   657  	g, _ := errgroup.WithContext(context.Background())
   658  	jobIDs := make(chan string)
   659  
   660  	var ret []*dashapi.JobInfo
   661  	var mu sync.Mutex
   662  
   663  	// The underlying code makes a number of queries, so let's do it in parallel to speed up processing.
   664  	const threads = 3
   665  	for i := 0; i < threads; i++ {
   666  		g.Go(func() error {
   667  			for id := range jobIDs {
   668  				job, jobKey, err := fetchJob(c, id)
   669  				if err != nil {
   670  					return err
   671  				}
   672  				build, err := loadBuild(c, job.Namespace, job.BuildID)
   673  				if err != nil {
   674  					return err
   675  				}
   676  				crashKey := db.NewKey(c, "Crash", "", job.CrashID, bug.key(c))
   677  				crash := new(Crash)
   678  				if err := db.Get(c, crashKey, crash); err != nil {
   679  					return fmt.Errorf("failed to get crash: %w", err)
   680  				}
   681  				info := makeJobInfo(c, job, jobKey, bug, build, crash)
   682  				mu.Lock()
   683  				ret = append(ret, info)
   684  				mu.Unlock()
   685  			}
   686  			return nil
   687  		})
   688  	}
   689  	for _, info := range bug.TreeTests.List {
   690  		if info.FirstOK != "" {
   691  			jobIDs <- info.FirstOK
   692  		}
   693  		if info.FirstCrash != "" {
   694  			jobIDs <- info.FirstCrash
   695  		}
   696  		if info.Error != "" {
   697  			jobIDs <- info.Error
   698  		}
   699  	}
   700  	// Wait until we have all information.
   701  	close(jobIDs)
   702  	err := g.Wait()
   703  	if err != nil {
   704  		return nil, err
   705  	}
   706  	// Sort structures to keep output consistent.
   707  	sort.Slice(ret, func(i, j int) bool {
   708  		if ret[i].KernelAlias != ret[j].KernelAlias {
   709  			return ret[i].KernelAlias < ret[j].KernelAlias
   710  		}
   711  		return ret[i].Finished.Before(ret[j].Finished)
   712  	})
   713  	return ret, nil
   714  }
   715  
   716  // Create a cross-tree bisection job (if needed).
   717  // Returns:
   718  // a) Job object and its key -- in case of success.
   719  // b) Whether the lookup was expensive (it can help optimize crossTreeBisection calls).
   720  func crossTreeBisection(c context.Context, bug *Bug,
   721  	managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, bool, error) {
   722  	repoGraph, err := makeRepoGraph(getNsConfig(c, bug.Namespace).Repos)
   723  	if err != nil {
   724  		return nil, nil, false, err
   725  	}
   726  	bugJobs := &lazyJobList{
   727  		c:       c,
   728  		bug:     bug,
   729  		jobType: JobBisectFix,
   730  	}
   731  	var job *Job
   732  	var jobKey *db.Key
   733  	expensive := false
   734  	err = repoGraph.forEachEdge(func(from, to *repoNode, info KernelRepoLink) error {
   735  		if jobKey != nil {
   736  			return nil
   737  		}
   738  		if !info.BisectFixes {
   739  			return nil
   740  		}
   741  		expensive = true
   742  		log.Infof(c, "%s: considering cross-tree bisection %s/%s",
   743  			bug.displayTitle(), from.repo.Alias, to.repo.Alias)
   744  		_, crashJob := bug.findResult(c, to.repo, wantNewAny{}, runOnHEAD{})
   745  		if crashJob == nil {
   746  			// No patch testing was performed yet.
   747  			return nil
   748  		}
   749  		if crashJob.CrashTitle == "" {
   750  			// The bug is already fixed on the target tree.
   751  			return nil
   752  		}
   753  		crashBuild, err := loadBuild(c, bug.Namespace, crashJob.BuildID)
   754  		if err != nil {
   755  			return err
   756  		}
   757  		manager, _ := activeManager(c, crashJob.Manager, crashJob.Namespace)
   758  		if !managers[manager].BisectFix {
   759  			return nil
   760  		}
   761  		_, successJob := bug.findResult(c, from.repo, wantNewAny{}, runOnHEAD{})
   762  		if successJob == nil {
   763  			// The jobs is not done yet.
   764  			return nil
   765  		}
   766  		if successJob.CrashTitle != "" {
   767  			// The kernel tree is still crashed by the repro.
   768  			return nil
   769  		}
   770  		newJob := &Job{
   771  			Type:            JobBisectFix,
   772  			Created:         timeNow(c),
   773  			Namespace:       bug.Namespace,
   774  			Manager:         crashJob.Manager,
   775  			BisectFrom:      crashBuild.KernelCommit,
   776  			KernelRepo:      from.repo.URL,
   777  			KernelBranch:    from.repo.Branch,
   778  			MergeBaseRepo:   to.repo.URL,
   779  			MergeBaseBranch: to.repo.Branch,
   780  			BugTitle:        bug.displayTitle(),
   781  			CrashID:         crashJob.CrashID,
   782  		}
   783  		// It's expected that crossTreeBisection is not concurrently called with the same
   784  		// manager list.
   785  		prevJob, err := bugJobs.lastMatch(newJob)
   786  		if err != nil {
   787  			return err
   788  		}
   789  		const repeatPeriod = time.Hour * 24 * 30
   790  		if prevJob != nil && (prevJob.Error == 0 ||
   791  			prevJob.Finished.After(timeNow(c).Add(-repeatPeriod))) {
   792  			// The job is already pending or failed recently. Skip.
   793  			return nil
   794  		}
   795  		job = newJob
   796  		jobKey, err = saveJob(c, newJob, bug.key(c))
   797  		return err
   798  	})
   799  	return job, jobKey, expensive, err
   800  }
   801  
   802  type lazyJobList struct {
   803  	c       context.Context
   804  	bug     *Bug
   805  	jobType JobType
   806  	jobs    *bugJobs
   807  }
   808  
   809  func (list *lazyJobList) lastMatch(job *Job) (*Job, error) {
   810  	if list.jobs == nil {
   811  		var err error
   812  		list.jobs, err = queryBugJobs(list.c, list.bug, list.jobType)
   813  		if err != nil {
   814  			return nil, err
   815  		}
   816  	}
   817  	var best *Job
   818  	for _, item := range list.jobs.all() {
   819  		otherJob := item.job
   820  		same := otherJob.Manager == job.Manager &&
   821  			otherJob.KernelRepo == job.KernelRepo &&
   822  			otherJob.KernelBranch == job.KernelBranch &&
   823  			otherJob.CrashID == job.CrashID &&
   824  			otherJob.MergeBaseRepo == job.MergeBaseRepo &&
   825  			otherJob.MergeBaseBranch == job.MergeBaseBranch
   826  		if !same {
   827  			continue
   828  		}
   829  		if best == nil || best.Created.Before(otherJob.Created) {
   830  			best = otherJob
   831  		}
   832  	}
   833  	return best, nil
   834  }
   835  
   836  func doneCrossTreeBisection(c context.Context, jobKey *db.Key, job *Job) error {
   837  	if job.Type != JobBisectFix || job.MergeBaseRepo == "" {
   838  		// Not a cross tree bisection.
   839  		return nil
   840  	}
   841  	if job.Error != 0 || job.isUnreliableBisect() || len(job.Commits) != 1 {
   842  		// The result is not interesting.
   843  		return nil
   844  	}
   845  	return updateSingleBug(c, jobKey.Parent(), func(bug *Bug) error {
   846  		bug.FixCandidateJob = jobKey.Encode()
   847  		return nil
   848  	})
   849  }
   850  
   851  type repoNode struct {
   852  	repo  KernelRepo
   853  	edges []repoEdge
   854  }
   855  
   856  type repoEdge struct {
   857  	in    bool
   858  	info  KernelRepoLink
   859  	other *repoNode
   860  }
   861  
   862  type repoGraph struct {
   863  	nodes map[string]*repoNode
   864  }
   865  
   866  func makeRepoGraph(repos []KernelRepo) (*repoGraph, error) {
   867  	g := &repoGraph{
   868  		nodes: map[string]*repoNode{},
   869  	}
   870  	for _, repo := range repos {
   871  		if repo.Alias == "" {
   872  			return nil, fmt.Errorf("one of the repos has an empty alias")
   873  		}
   874  		g.nodes[repo.Alias] = &repoNode{repo: repo}
   875  	}
   876  	for _, repo := range repos {
   877  		for _, link := range repo.CommitInflow {
   878  			if g.nodes[link.Alias] == nil {
   879  				return nil, fmt.Errorf("no repo with alias %q", link.Alias)
   880  			}
   881  			g.nodes[repo.Alias].addEdge(true, link, g.nodes[link.Alias])
   882  			g.nodes[link.Alias].addEdge(false, link, g.nodes[repo.Alias])
   883  		}
   884  	}
   885  	for alias, node := range g.nodes {
   886  		reachable := node.reachable(true)
   887  		if _, ok := reachable[node]; ok {
   888  			return nil, fmt.Errorf("%q lies on a cycle", alias)
   889  		}
   890  	}
   891  	return g, nil
   892  }
   893  
   894  func (g *repoGraph) nodeByRepo(url, branch string) *repoNode {
   895  	for _, node := range g.nodes {
   896  		if node.repo.URL == url && node.repo.Branch == branch {
   897  			return node
   898  		}
   899  	}
   900  	return nil
   901  }
   902  
   903  func (g *repoGraph) nodeByAlias(alias string) *repoNode {
   904  	for _, node := range g.nodes {
   905  		if node.repo.Alias == alias {
   906  			return node
   907  		}
   908  	}
   909  	return nil
   910  }
   911  
   912  func (g *repoGraph) forEachEdge(cb func(from, to *repoNode, info KernelRepoLink) error) error {
   913  	for _, node := range g.nodes {
   914  		for _, e := range node.edges {
   915  			if !e.in {
   916  				continue
   917  			}
   918  			err := cb(e.other, node, e.info)
   919  			if err != nil {
   920  				return err
   921  			}
   922  		}
   923  	}
   924  	return nil
   925  }
   926  
   927  // reachable returns a map *repoNode -> bool (whether commits are merged).
   928  func (n *repoNode) reachable(in bool) map[*repoNode]bool {
   929  	ret := map[*repoNode]bool{}
   930  	// First collect nodes only reachable via merge=true links.
   931  	n.reachableMerged(in, true, ret)
   932  	n.reachableMerged(in, false, ret)
   933  	return ret
   934  }
   935  
   936  func (n *repoNode) reachableMerged(in, onlyMerge bool, ret map[*repoNode]bool) {
   937  	var dfs func(*repoNode, bool)
   938  	dfs = func(node *repoNode, merge bool) {
   939  		for _, edge := range node.edges {
   940  			if edge.in != in || onlyMerge && !edge.info.Merge {
   941  				continue
   942  			}
   943  			if _, ok := ret[edge.other]; ok {
   944  				continue
   945  			}
   946  			ret[edge.other] = merge && edge.info.Merge
   947  			dfs(edge.other, merge && edge.info.Merge)
   948  		}
   949  	}
   950  	dfs(n, true)
   951  }
   952  
   953  func (n *repoNode) allReachable() map[*repoNode]bool {
   954  	ret := n.reachable(true)
   955  	for node, merge := range n.reachable(false) {
   956  		ret[node] = merge
   957  	}
   958  	return ret
   959  }
   960  
   961  func (n *repoNode) addEdge(in bool, info KernelRepoLink, other *repoNode) {
   962  	n.edges = append(n.edges, repoEdge{
   963  		in:    in,
   964  		info:  info,
   965  		other: other,
   966  	})
   967  }