github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/dashboard/app/tree.go (about)

     1  // Copyright 2023 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  // Given information on how commits flow from one kernel source tree to another, assign
     7  // bugs labels of two kinds:
     8  // a) LabelIntroduced -- reproducer does not work in any other kernel tree, FROM which commits flow.
     9  // b) LabelReached -- reproducer does not work in any other kernel tree, TO which commits flow.
    10  
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"sort"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/google/syzkaller/dashboard/dashapi"
    19  	"golang.org/x/sync/errgroup"
    20  	db "google.golang.org/appengine/v2/datastore"
    21  	"google.golang.org/appengine/v2/log"
    22  )
    23  
    24  // generateTreeOriginJobs generates new jobs for bug origin tree determination.
    25  func generateTreeOriginJobs(cGlobal context.Context, bugKey *db.Key,
    26  	managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) {
    27  	var job *Job
    28  	var jobKey *db.Key
    29  	tx := func(c context.Context) error {
    30  		bug := new(Bug)
    31  		if err := db.Get(c, bugKey, bug); err != nil {
    32  			return fmt.Errorf("failed to get bug: %w", err)
    33  		}
    34  		ctx := &bugTreeContext{
    35  			c:       c,
    36  			cGlobal: cGlobal,
    37  			bug:     bug,
    38  			bugKey:  bug.key(c),
    39  		}
    40  		ret := ctx.pollBugTreeJobs(managers)
    41  		switch ret.(type) {
    42  		case pollResultError:
    43  			return ret.(error)
    44  		case pollResultWait:
    45  			newTime, ok := ret.(time.Time)
    46  			if ok && newTime.After(bug.TreeTests.NextPoll) {
    47  				bug.TreeTests.NextPoll = newTime
    48  			}
    49  		}
    50  		bug.TreeTests.NeedPoll = false
    51  		if _, err := db.Put(c, bugKey, bug); err != nil {
    52  			return fmt.Errorf("failed to put bug: %w", err)
    53  		}
    54  		job, jobKey = ctx.job, ctx.jobKey
    55  		return nil
    56  	}
    57  	if err := runInTransaction(cGlobal, tx, &db.TransactionOptions{XG: true}); err != nil {
    58  		return nil, nil, err
    59  	}
    60  	return job, jobKey, nil
    61  }
    62  
    63  // treeOriginJobDone is supposed to be called when tree origin job is done.
    64  // It keeps the cached info in Bug up to date and assigns bug tree origin labels.
    65  func treeOriginJobDone(cGlobal context.Context, jobKey *db.Key, job *Job) error {
    66  	bugKey := jobKey.Parent()
    67  	tx := func(c context.Context) error {
    68  		bug := new(Bug)
    69  		if err := db.Get(c, bugKey, bug); err != nil {
    70  			return fmt.Errorf("failed to get bug: %w", err)
    71  		}
    72  		ctx := &bugTreeContext{
    73  			c:         c,
    74  			cGlobal:   cGlobal,
    75  			bug:       bug,
    76  			bugKey:    bug.key(c),
    77  			noNewJobs: true,
    78  		}
    79  		ret := ctx.pollBugTreeJobs(
    80  			map[string]dashapi.ManagerJobs{job.Manager: {TestPatches: true}},
    81  		)
    82  		switch ret.(type) {
    83  		case pollResultError:
    84  			return ret.(error)
    85  		case pollResultPending:
    86  			bug.TreeTests.NextPoll = time.Time{}
    87  			bug.TreeTests.NeedPoll = true
    88  		}
    89  		if _, err := db.Put(c, bugKey, bug); err != nil {
    90  			return fmt.Errorf("failed to put bug: %w", err)
    91  		}
    92  		return nil
    93  	}
    94  	return runInTransaction(cGlobal, tx, &db.TransactionOptions{XG: true})
    95  }
    96  
    97  type pollTreeJobResult interface{}
    98  
    99  // pollResultPending is returned when we wait some job to finish.
   100  type pollResultPending struct{}
   101  
   102  // pollResultWait is returned when we know the next time the process could be repeated.
   103  type pollResultWait time.Time
   104  
   105  // pollResultSkip means that there are no poll jobs we could run at the moment.
   106  // It's impossible to say when it changes, so it's better not to repeat polling soon.
   107  type pollResultSkip struct{}
   108  
   109  type pollResultError error
   110  
   111  type pollResultDone struct {
   112  	Crashed  bool
   113  	Finished time.Time
   114  }
   115  
   116  type bugTreeContext struct {
   117  	c context.Context
   118  	// Datastore puts limits on how often a single entity can be accessed by transactions.
   119  	// And we actually don't always need a consistent view of the DB, we just want to query
   120  	// a single entity. So, when possible, let's make queries outside of a transaction.
   121  	cGlobal   context.Context
   122  	crash     *Crash
   123  	crashKey  *db.Key
   124  	bugKey    *db.Key
   125  	bug       *Bug
   126  	build     *Build
   127  	repoNode  *repoNode
   128  	noNewJobs bool
   129  
   130  	// If any jobs were created, here'll be one of them.
   131  	job    *Job
   132  	jobKey *db.Key
   133  }
   134  
   135  func (ctx *bugTreeContext) pollBugTreeJobs(managers map[string]dashapi.ManagerJobs) pollTreeJobResult {
   136  	// Determine the crash we'd stick to.
   137  	err := ctx.loadCrashInfo()
   138  	if err != nil {
   139  		log.Errorf(ctx.c, "bug %q: failed to load crash info: %s", ctx.bug.displayTitle(), err)
   140  		return pollResultError(err)
   141  	}
   142  	if ctx.crash == nil {
   143  		// There are no crashes we could further work with.
   144  		// TODO: consider looking at the recent repro retest results.
   145  		log.Infof(ctx.c, "bug %q: no suitable crash", ctx.bug.displayTitle())
   146  		return pollResultSkip{}
   147  	}
   148  	if ctx.repoNode == nil {
   149  		// We have no information about the tree on which the bug happened.
   150  		log.Errorf(ctx.c, "bug %q: no information about the tree", ctx.bug.displayTitle())
   151  		return pollResultSkip{}
   152  	}
   153  	if !managers[ctx.crash.Manager].TestPatches {
   154  		return pollResultSkip{}
   155  	}
   156  	if len(ctx.bug.TreeTests.List) > 0 && ctx.crashKey.IntID() != ctx.bug.TreeTests.List[0].CrashID {
   157  		// Clean up old job records, they are no longer relevant.
   158  		ctx.bug.TreeTests.List = nil
   159  	}
   160  	for i := range ctx.bug.TreeTests.List {
   161  		err := ctx.bug.TreeTests.List[i].applyPending(ctx.c)
   162  		if err != nil {
   163  			return pollResultError(err)
   164  		}
   165  	}
   166  	return ctx.groupResults([]pollTreeJobResult{
   167  		ctx.setOriginLabels(),
   168  		ctx.missingBackports(),
   169  	})
   170  }
   171  
   172  func (ctx *bugTreeContext) setOriginLabels() pollTreeJobResult {
   173  	if !ctx.labelsCanBeSet() || ctx.bug.HasUserLabel(OriginLabel) {
   174  		return pollResultSkip{}
   175  	}
   176  	ctx.bug.UnsetLabels(OriginLabel)
   177  
   178  	var results []pollTreeJobResult
   179  	perNode := map[*repoNode]pollTreeJobResult{}
   180  	for node, merge := range ctx.repoNode.allReachable() {
   181  		var result pollTreeJobResult
   182  		if merge {
   183  			// Merge base gives a much better result quality, so use it whenever possible.
   184  			result = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{
   185  				Repo:   ctx.build.KernelRepo,
   186  				Branch: ctx.build.KernelBranch,
   187  			})
   188  		} else {
   189  			result = ctx.runRepro(node.repo, wantFirstAny{}, runOnHEAD{})
   190  		}
   191  		perNode[node] = result
   192  		results = append(results, result)
   193  	}
   194  	result := ctx.groupResults(results)
   195  	if _, ok := result.(pollResultPending); ok {
   196  		// At least wait until all started jobs have finished (successfully or not).
   197  		return result
   198  	}
   199  	lastDone := ctx.lastDone(results)
   200  	if lastDone.IsZero() {
   201  		// Demand that at least one of the finished jobs has finished successfully.
   202  		return pollResultSkip{}
   203  	}
   204  	// Since we have a repro for it, it definitely crashed at some point.
   205  	perNode[ctx.repoNode] = pollResultDone{Crashed: true}
   206  	allLabels := append(ctx.selectRepoLabels(true, perNode), ctx.selectRepoLabels(false, perNode)...)
   207  	for _, label := range allLabels {
   208  		if label == ctx.repoNode.repo.LabelIntroduced || label == ctx.repoNode.repo.LabelReached {
   209  			// It looks like our reproducer does not work on other trees.
   210  			// Just in case verify that it still works on the original one.
   211  			result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(lastDone), runOnHEAD{})
   212  			resultDone, ok := result.(pollResultDone)
   213  			if !ok {
   214  				return result
   215  			}
   216  			if !resultDone.Crashed {
   217  				// Unfortunately the repro no longer works. Don't assign labels.
   218  				return pollResultSkip{}
   219  			}
   220  		}
   221  	}
   222  	var labels []BugLabel
   223  	for _, label := range allLabels {
   224  		labels = append(labels, BugLabel{Label: OriginLabel, Value: label})
   225  	}
   226  	ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), labels)
   227  	return pollResultSkip{}
   228  }
   229  
   230  // selectRepoLabels attributes bugs to trees depending on the patch testing results.
   231  func (ctx *bugTreeContext) selectRepoLabels(in bool, results map[*repoNode]pollTreeJobResult) []string {
   232  	crashed := map[*repoNode]bool{}
   233  	for node, result := range results {
   234  		done, ok := result.(pollResultDone)
   235  		if ok {
   236  			crashed[node] = done.Crashed
   237  		}
   238  	}
   239  	for node := range crashed {
   240  		if !crashed[node] {
   241  			continue
   242  		}
   243  		// (1) The in = true case:
   244  		// If, for a tree X, there's a tree Y from which commits flow to X and the reproducer crashed
   245  		// on Y, X cannot be among bug origin trees.
   246  		// (1) The in = false case:
   247  		// If, for a tree X, there's a tree Y to which commits flow to X and the reproducer crashed
   248  		// on Y, X cannot be the last tree to which the bug has spread.
   249  		for otherNode := range node.reachable(!in) {
   250  			crashed[otherNode] = false
   251  		}
   252  	}
   253  	ret := []string{}
   254  	for node, set := range crashed {
   255  		if !set {
   256  			continue
   257  		}
   258  		if in && node.repo.LabelIntroduced != "" {
   259  			ret = append(ret, node.repo.LabelIntroduced)
   260  		} else if !in && node.repo.LabelReached != "" {
   261  			ret = append(ret, node.repo.LabelReached)
   262  		}
   263  	}
   264  	return ret
   265  }
   266  
   267  // Test if there's any sense in testing other trees.
   268  // For example, if we hit a bug on a mainline, there's no sense to test linux-next to check
   269  // if it's a linux-next bug.
   270  func (ctx *bugTreeContext) labelsCanBeSet() bool {
   271  	for node := range ctx.repoNode.reachable(true) {
   272  		if node.repo.LabelIntroduced != "" {
   273  			return true
   274  		}
   275  	}
   276  	for node := range ctx.repoNode.reachable(false) {
   277  		if node.repo.LabelReached != "" {
   278  			return true
   279  		}
   280  	}
   281  	return ctx.repoNode.repo.LabelIntroduced != "" ||
   282  		ctx.repoNode.repo.LabelReached != ""
   283  }
   284  
   285  func (ctx *bugTreeContext) missingBackports() pollTreeJobResult {
   286  	if !ctx.repoNode.repo.DetectMissingBackports || ctx.bug.HasUserLabel(MissingBackportLabel) {
   287  		return pollResultSkip{}
   288  	}
   289  	var okDate time.Time
   290  	results := []pollTreeJobResult{}
   291  	for node, merge := range ctx.repoNode.reachable(true) {
   292  		resultOK := ctx.runRepro(node.repo, wantFirstOK{}, runOnHEAD{})
   293  		doneOK, ok := resultOK.(pollResultDone)
   294  		if !ok {
   295  			results = append(results, resultOK)
   296  			continue
   297  		}
   298  		var resultCrash pollTreeJobResult
   299  		if merge {
   300  			resultCrash = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{
   301  				Repo:   ctx.build.KernelRepo,
   302  				Branch: ctx.build.KernelBranch,
   303  			})
   304  		} else {
   305  			// We already know that the reproducer doesn't crash the tree.
   306  			// There'd be no sense to call runRepro in the hope of getting a crash,
   307  			// so let's just look into the past tree testing results.
   308  			resultCrash, _ = ctx.bug.findResult(ctx.c, node.repo, wantFirstCrash{}, runOnAny{})
   309  		}
   310  		doneCrash, ok := resultCrash.(pollResultDone)
   311  		if !ok {
   312  			results = append(results, resultCrash)
   313  			continue
   314  		} else if merge && doneCrash.Crashed || doneOK.Finished.After(doneCrash.Finished) {
   315  			// That's what we want: earlier it crashed and then stopped.
   316  			okDate = doneOK.Finished
   317  			break
   318  		}
   319  	}
   320  	if okDate.IsZero() {
   321  		return ctx.groupResults(results)
   322  	}
   323  	// We are about to assign the "missing backport" label.
   324  	// To reduce the number of backports, just in case run once more on HEAD.
   325  	// The bug fix could have already reached the repository.
   326  	result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(okDate), runOnHEAD{})
   327  	resultDone, ok := result.(pollResultDone)
   328  	if !ok {
   329  		return result
   330  	}
   331  	ctx.bug.UnsetLabels(MissingBackportLabel)
   332  	if resultDone.Crashed {
   333  		ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), []BugLabel{
   334  			{Label: MissingBackportLabel},
   335  		})
   336  	}
   337  	return pollResultSkip{}
   338  }
   339  
   340  func (ctx *bugTreeContext) lastDone(results []pollTreeJobResult) time.Time {
   341  	var maxTime time.Time
   342  	for _, item := range results {
   343  		done, ok := item.(pollResultDone)
   344  		if !ok {
   345  			continue
   346  		}
   347  		if done.Finished.After(maxTime) {
   348  			maxTime = done.Finished
   349  		}
   350  	}
   351  	return maxTime
   352  }
   353  
   354  func (ctx *bugTreeContext) groupResults(results []pollTreeJobResult) pollTreeJobResult {
   355  	var minWait time.Time
   356  	for _, result := range results {
   357  		switch v := result.(type) {
   358  		case pollResultPending, pollResultError:
   359  			// Wait for the job result to continue.
   360  			return result
   361  		case pollResultWait:
   362  			t := time.Time(v)
   363  			if minWait.IsZero() || minWait.After(t) {
   364  				minWait = t
   365  			}
   366  		}
   367  	}
   368  	if !minWait.IsZero() {
   369  		return pollResultWait(minWait)
   370  	}
   371  	return pollResultSkip{}
   372  }
   373  
   374  type expectedResult interface{}
   375  
   376  // resultFreshness subtypes.
   377  type wantFirstOK struct{}
   378  type wantFirstCrash struct{}
   379  type wantFirstAny struct{}
   380  type wantNewAny time.Time
   381  
   382  type runReproOn interface{}
   383  
   384  // runReproOn subtypes.
   385  type runOnAny struct{} // attempts to find any result, if unsuccessful, runs on HEAD
   386  type runOnHEAD struct{}
   387  type runOnMergeBase struct {
   388  	Repo   string
   389  	Branch string
   390  }
   391  
   392  func (ctx *bugTreeContext) runRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult {
   393  	ret := ctx.doRunRepro(repo, result, runOn)
   394  	log.Infof(ctx.c, "runRepro on %s, %T, %T: %#v", repo.Alias, result, runOn, ret)
   395  	return ret
   396  }
   397  
   398  func (ctx *bugTreeContext) doRunRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult {
   399  	existingResult, _ := ctx.bug.findResult(ctx.c, repo, result, runOn)
   400  	if _, ok := existingResult.(pollResultSkip); !ok {
   401  		return existingResult
   402  	}
   403  	// Okay, nothing suitable was found. We need to set up a new job.
   404  	if ctx.noNewJobs {
   405  		return pollResultPending{}
   406  	}
   407  	// First check if there's existing BugTreeTest object.
   408  	if _, ok := runOn.(runOnAny); ok {
   409  		runOn = runOnHEAD{}
   410  	}
   411  	candidates := ctx.bug.matchingTreeTests(repo, runOn)
   412  	var bugTreeTest *BugTreeTest
   413  	if len(candidates) > 0 {
   414  		bugTreeTest = &ctx.bug.TreeTests.List[candidates[0]]
   415  	} else {
   416  		item := BugTreeTest{
   417  			CrashID: ctx.crashKey.IntID(),
   418  			Repo:    repo.URL,
   419  			Branch:  repo.Branch,
   420  		}
   421  		if v, ok := runOn.(runOnMergeBase); ok {
   422  			item.MergeBaseRepo = v.Repo
   423  			item.MergeBaseBranch = v.Branch
   424  		}
   425  		ctx.bug.TreeTests.List = append(ctx.bug.TreeTests.List, item)
   426  		bugTreeTest = &ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1]
   427  	}
   428  
   429  	if bugTreeTest.Error != "" {
   430  		const errorRetryTime = 24 * time.Hour * 14
   431  		result := ctx.ensureRepeatPeriod(bugTreeTest.Error, errorRetryTime)
   432  		if _, ok := result.(pollResultSkip); !ok {
   433  			return result
   434  		}
   435  		bugTreeTest.Error = ""
   436  	}
   437  	if bugTreeTest.Last != "" {
   438  		const fixRetryTime = 24 * time.Hour * 45
   439  		result := ctx.ensureRepeatPeriod(bugTreeTest.Last, fixRetryTime)
   440  		if _, ok := result.(pollResultSkip); !ok {
   441  			return result
   442  		}
   443  	}
   444  	var err error
   445  	ctx.job, ctx.jobKey, err = addTestJob(ctx.c, &testJobArgs{
   446  		crash:         ctx.crash,
   447  		crashKey:      ctx.crashKey,
   448  		configRef:     ctx.build.KernelConfig,
   449  		configAppend:  repo.AppendConfig,
   450  		inTransaction: true,
   451  		treeOrigin:    true,
   452  		testReqArgs: testReqArgs{
   453  			bug:             ctx.bug,
   454  			bugKey:          ctx.bugKey,
   455  			repo:            bugTreeTest.Repo,
   456  			branch:          bugTreeTest.Branch,
   457  			mergeBaseRepo:   bugTreeTest.MergeBaseRepo,
   458  			mergeBaseBranch: bugTreeTest.MergeBaseBranch,
   459  		},
   460  	})
   461  	if err != nil {
   462  		return pollResultError(err)
   463  	}
   464  	bugTreeTest.Pending = ctx.jobKey.Encode()
   465  	return pollResultPending{}
   466  }
   467  
   468  func (ctx *bugTreeContext) ensureRepeatPeriod(jobKey string, period time.Duration) pollTreeJobResult {
   469  	job, _, err := fetchJob(ctx.c, jobKey)
   470  	if err != nil {
   471  		return pollResultError(err)
   472  	}
   473  	timePassed := timeNow(ctx.c).Sub(job.Finished)
   474  	if timePassed < period {
   475  		return pollResultWait(job.Finished.Add(period))
   476  	}
   477  	return pollResultSkip{}
   478  }
   479  
   480  func (bug *Bug) findResult(c context.Context,
   481  	repo KernelRepo, result expectedResult, runOn runReproOn) (pollTreeJobResult, *Job) {
   482  	anyPending := false
   483  	for _, i := range bug.matchingTreeTests(repo, runOn) {
   484  		info := &bug.TreeTests.List[i]
   485  		anyPending = anyPending || info.Pending != ""
   486  		key := ""
   487  		switch result.(type) {
   488  		case wantFirstOK:
   489  			key = info.FirstOK
   490  		case wantFirstCrash:
   491  			key = info.FirstCrash
   492  		case wantFirstAny:
   493  			key = info.First
   494  		case wantNewAny:
   495  			key = info.Last
   496  		default:
   497  			return pollResultError(fmt.Errorf("unexpected expected result: %T", result)), nil
   498  		}
   499  		if key == "" {
   500  			continue
   501  		}
   502  		job, _, err := fetchJob(c, key)
   503  		if err != nil {
   504  			return pollResultError(err), nil
   505  		}
   506  		if date, ok := result.(wantNewAny); ok {
   507  			if job.Finished.Before(time.Time(date)) {
   508  				continue
   509  			}
   510  		}
   511  		return pollResultDone{
   512  			Crashed:  job.CrashTitle != "",
   513  			Finished: job.Finished,
   514  		}, job
   515  	}
   516  	if anyPending {
   517  		return pollResultPending{}, nil
   518  	} else {
   519  		return pollResultSkip{}, nil
   520  	}
   521  }
   522  
   523  func (bug *Bug) matchingTreeTests(repo KernelRepo, runOn runReproOn) []int {
   524  	ret := []int{}
   525  	for i, item := range bug.TreeTests.List {
   526  		if item.Repo != repo.URL {
   527  			continue
   528  		}
   529  		ok := true
   530  		switch v := runOn.(type) {
   531  		case runOnHEAD:
   532  			// TODO: should we check for an empty merge base here?
   533  			ok = item.Branch == repo.Branch
   534  		case runOnMergeBase:
   535  			ok = item.Branch == repo.Branch &&
   536  				item.MergeBaseRepo == v.Repo &&
   537  				item.MergeBaseBranch == v.Branch
   538  		}
   539  		if ok {
   540  			ret = append(ret, i)
   541  		}
   542  	}
   543  	return ret
   544  }
   545  
   546  func (ctx *bugTreeContext) loadCrashInfo() error {
   547  	// First look at the crash from previous tests.
   548  	if len(ctx.bug.TreeTests.List) > 0 {
   549  		crashID := ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1].CrashID
   550  		crashKey := db.NewKey(ctx.c, "Crash", "", crashID, ctx.bugKey)
   551  		crash := new(Crash)
   552  		// We need to also tolerate the case when the crash was just deleted.
   553  		err := db.Get(ctx.cGlobal, crashKey, crash)
   554  		if err != nil && err != db.ErrNoSuchEntity {
   555  			return fmt.Errorf("failed to get crash: %w", err)
   556  		} else if err == nil {
   557  			ok, build, err := ctx.isCrashRelevant(crash)
   558  			if err != nil {
   559  				return err
   560  			}
   561  			if ok {
   562  				ctx.build = build
   563  				ctx.crash = crash
   564  				ctx.crashKey = crashKey
   565  			}
   566  		}
   567  	}
   568  
   569  	// Query the most relevant crash with repro.
   570  	crash, crashKey, err := findCrashForBug(ctx.cGlobal, ctx.bug)
   571  	if err != nil {
   572  		return err
   573  	}
   574  	ok, build, err := ctx.isCrashRelevant(crash)
   575  	if err != nil {
   576  		return err
   577  	} else if ok && (ctx.crash == nil || crash.ReportLen > ctx.crash.ReportLen) {
   578  		// Update the crash only if we found a better one.
   579  		ctx.build = build
   580  		ctx.crash = crash
   581  		ctx.crashKey = crashKey
   582  	}
   583  	// Load the rest of the data.
   584  	if ctx.crash != nil {
   585  		var err error
   586  		ns := ctx.bug.Namespace
   587  		repoGraph, err := makeRepoGraph(getNsConfig(ctx.c, ns).Repos)
   588  		if err != nil {
   589  			return err
   590  		}
   591  		ctx.repoNode = repoGraph.nodeByRepo(ctx.build.KernelRepo, ctx.build.KernelBranch)
   592  	}
   593  	return nil
   594  }
   595  
   596  func (ctx *bugTreeContext) isCrashRelevant(crash *Crash) (bool, *Build, error) {
   597  	if crash.ReproIsRevoked {
   598  		// No sense in running the reproducer.
   599  		return false, nil, nil
   600  	} else if crash.ReproC == 0 && crash.ReproSyz == 0 {
   601  		// Let's wait for the repro.
   602  		return false, nil, nil
   603  	}
   604  	newManager, _ := activeManager(ctx.cGlobal, crash.Manager, ctx.bug.Namespace)
   605  	if newManager != crash.Manager {
   606  		// The manager was deprecated since the crash.
   607  		// Let's just ignore such bugs for now.
   608  		return false, nil, nil
   609  	}
   610  	build, err := loadBuild(ctx.cGlobal, ctx.bug.Namespace, crash.BuildID)
   611  	if err != nil {
   612  		return false, nil, err
   613  	}
   614  	mgrBuild, err := lastManagerBuild(ctx.cGlobal, build.Namespace, newManager)
   615  	if err != nil {
   616  		return false, build, err
   617  	}
   618  	// It does happen that we sometimes update the tested tree.
   619  	// It's not frequent at all, but it will make all results very confusing.
   620  	return build.KernelRepo == mgrBuild.KernelRepo &&
   621  		build.KernelBranch == mgrBuild.KernelBranch, build, nil
   622  }
   623  
   624  func (test *BugTreeTest) applyPending(c context.Context) error {
   625  	if test.Pending == "" {
   626  		return nil
   627  	}
   628  	job, _, err := fetchJob(c, test.Pending)
   629  	if err != nil {
   630  		return err
   631  	}
   632  	if job.Finished.IsZero() {
   633  		// Not yet ready.
   634  		return nil
   635  	}
   636  	pendingKey := test.Pending
   637  	test.Pending = ""
   638  	if job.Error != 0 {
   639  		test.Error = pendingKey
   640  		return nil
   641  	}
   642  	test.Last = pendingKey
   643  	if test.First == "" {
   644  		test.First = pendingKey
   645  	}
   646  	if test.FirstOK == "" && job.CrashTitle == "" {
   647  		test.FirstOK = pendingKey
   648  	} else if test.FirstCrash == "" && job.CrashTitle != "" {
   649  		test.FirstCrash = pendingKey
   650  	}
   651  	return nil
   652  }
   653  
   654  // treeTestJobs fetches relevant tree testing results.
   655  func treeTestJobs(c context.Context, bug *Bug) ([]*dashapi.JobInfo, error) {
   656  	g, _ := errgroup.WithContext(context.Background())
   657  	jobIDs := make(chan string)
   658  
   659  	var ret []*dashapi.JobInfo
   660  	var mu sync.Mutex
   661  
   662  	// The underlying code makes a number of queries, so let's do it in parallel to speed up processing.
   663  	const threads = 3
   664  	for i := 0; i < threads; i++ {
   665  		g.Go(func() error {
   666  			for id := range jobIDs {
   667  				job, jobKey, err := fetchJob(c, id)
   668  				if err != nil {
   669  					return err
   670  				}
   671  				build, err := loadBuild(c, job.Namespace, job.BuildID)
   672  				if err != nil {
   673  					return err
   674  				}
   675  				crashKey := db.NewKey(c, "Crash", "", job.CrashID, bug.key(c))
   676  				crash := new(Crash)
   677  				if err := db.Get(c, crashKey, crash); err != nil {
   678  					return fmt.Errorf("failed to get crash: %w", err)
   679  				}
   680  				info := makeJobInfo(c, job, jobKey, bug, build, crash)
   681  				mu.Lock()
   682  				ret = append(ret, info)
   683  				mu.Unlock()
   684  			}
   685  			return nil
   686  		})
   687  	}
   688  	for _, info := range bug.TreeTests.List {
   689  		if info.FirstOK != "" {
   690  			jobIDs <- info.FirstOK
   691  		}
   692  		if info.FirstCrash != "" {
   693  			jobIDs <- info.FirstCrash
   694  		}
   695  		if info.Error != "" {
   696  			jobIDs <- info.Error
   697  		}
   698  	}
   699  	// Wait until we have all information.
   700  	close(jobIDs)
   701  	err := g.Wait()
   702  	if err != nil {
   703  		return nil, err
   704  	}
   705  	// Sort structures to keep output consistent.
   706  	sort.Slice(ret, func(i, j int) bool {
   707  		if ret[i].KernelAlias != ret[j].KernelAlias {
   708  			return ret[i].KernelAlias < ret[j].KernelAlias
   709  		}
   710  		return ret[i].Finished.Before(ret[j].Finished)
   711  	})
   712  	return ret, nil
   713  }
   714  
   715  // Create a cross-tree bisection job (if needed).
   716  // Returns:
   717  // a) Job object and its key -- in case of success.
   718  // b) Whether the lookup was expensive (it can help optimize crossTreeBisection calls).
   719  func crossTreeBisection(c context.Context, bug *Bug,
   720  	managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, bool, error) {
   721  	repoGraph, err := makeRepoGraph(getNsConfig(c, bug.Namespace).Repos)
   722  	if err != nil {
   723  		return nil, nil, false, err
   724  	}
   725  	bugJobs := &lazyJobList{
   726  		c:       c,
   727  		bug:     bug,
   728  		jobType: JobBisectFix,
   729  	}
   730  	var job *Job
   731  	var jobKey *db.Key
   732  	expensive := false
   733  	err = repoGraph.forEachEdge(func(from, to *repoNode, info KernelRepoLink) error {
   734  		if jobKey != nil {
   735  			return nil
   736  		}
   737  		if !info.BisectFixes {
   738  			return nil
   739  		}
   740  		expensive = true
   741  		log.Infof(c, "%s: considering cross-tree bisection %s/%s",
   742  			bug.displayTitle(), from.repo.Alias, to.repo.Alias)
   743  		_, crashJob := bug.findResult(c, to.repo, wantNewAny{}, runOnHEAD{})
   744  		if crashJob == nil {
   745  			// No patch testing was performed yet.
   746  			return nil
   747  		}
   748  		if crashJob.CrashTitle == "" {
   749  			// The bug is already fixed on the target tree.
   750  			return nil
   751  		}
   752  		crashBuild, err := loadBuild(c, bug.Namespace, crashJob.BuildID)
   753  		if err != nil {
   754  			return err
   755  		}
   756  		manager, _ := activeManager(c, crashJob.Manager, crashJob.Namespace)
   757  		if !managers[manager].BisectFix {
   758  			return nil
   759  		}
   760  		_, successJob := bug.findResult(c, from.repo, wantNewAny{}, runOnHEAD{})
   761  		if successJob == nil {
   762  			// The jobs is not done yet.
   763  			return nil
   764  		}
   765  		if successJob.CrashTitle != "" {
   766  			// The kernel tree is still crashed by the repro.
   767  			return nil
   768  		}
   769  		newJob := &Job{
   770  			Type:            JobBisectFix,
   771  			Created:         timeNow(c),
   772  			Namespace:       bug.Namespace,
   773  			Manager:         crashJob.Manager,
   774  			BisectFrom:      crashBuild.KernelCommit,
   775  			KernelRepo:      from.repo.URL,
   776  			KernelBranch:    from.repo.Branch,
   777  			MergeBaseRepo:   to.repo.URL,
   778  			MergeBaseBranch: to.repo.Branch,
   779  			BugTitle:        bug.displayTitle(),
   780  			CrashID:         crashJob.CrashID,
   781  		}
   782  		// It's expected that crossTreeBisection is not concurrently called with the same
   783  		// manager list.
   784  		prevJob, err := bugJobs.lastMatch(newJob)
   785  		if err != nil {
   786  			return err
   787  		}
   788  		const repeatPeriod = time.Hour * 24 * 30
   789  		if prevJob != nil && (prevJob.Error == 0 ||
   790  			prevJob.Finished.After(timeNow(c).Add(-repeatPeriod))) {
   791  			// The job is already pending or failed recently. Skip.
   792  			return nil
   793  		}
   794  		job = newJob
   795  		jobKey, err = saveJob(c, newJob, bug.key(c))
   796  		return err
   797  	})
   798  	return job, jobKey, expensive, err
   799  }
   800  
   801  type lazyJobList struct {
   802  	c       context.Context
   803  	bug     *Bug
   804  	jobType JobType
   805  	jobs    *bugJobs
   806  }
   807  
   808  func (list *lazyJobList) lastMatch(job *Job) (*Job, error) {
   809  	if list.jobs == nil {
   810  		var err error
   811  		list.jobs, err = queryBugJobs(list.c, list.bug, list.jobType)
   812  		if err != nil {
   813  			return nil, err
   814  		}
   815  	}
   816  	var best *Job
   817  	for _, item := range list.jobs.all() {
   818  		otherJob := item.job
   819  		same := otherJob.Manager == job.Manager &&
   820  			otherJob.KernelRepo == job.KernelRepo &&
   821  			otherJob.KernelBranch == job.KernelBranch &&
   822  			otherJob.CrashID == job.CrashID &&
   823  			otherJob.MergeBaseRepo == job.MergeBaseRepo &&
   824  			otherJob.MergeBaseBranch == job.MergeBaseBranch
   825  		if !same {
   826  			continue
   827  		}
   828  		if best == nil || best.Created.Before(otherJob.Created) {
   829  			best = otherJob
   830  		}
   831  	}
   832  	return best, nil
   833  }
   834  
   835  func doneCrossTreeBisection(c context.Context, jobKey *db.Key, job *Job) error {
   836  	if job.Type != JobBisectFix || job.MergeBaseRepo == "" {
   837  		// Not a cross tree bisection.
   838  		return nil
   839  	}
   840  	if job.Error != 0 || job.isUnreliableBisect() || len(job.Commits) != 1 {
   841  		// The result is not interesting.
   842  		return nil
   843  	}
   844  	return updateSingleBug(c, jobKey.Parent(), func(bug *Bug) error {
   845  		bug.FixCandidateJob = jobKey.Encode()
   846  		return nil
   847  	})
   848  }
   849  
   850  type repoNode struct {
   851  	repo  KernelRepo
   852  	edges []repoEdge
   853  }
   854  
   855  type repoEdge struct {
   856  	in    bool
   857  	info  KernelRepoLink
   858  	other *repoNode
   859  }
   860  
   861  type repoGraph struct {
   862  	nodes map[string]*repoNode
   863  }
   864  
   865  func makeRepoGraph(repos []KernelRepo) (*repoGraph, error) {
   866  	g := &repoGraph{
   867  		nodes: map[string]*repoNode{},
   868  	}
   869  	for _, repo := range repos {
   870  		if repo.Alias == "" {
   871  			return nil, fmt.Errorf("one of the repos has an empty alias")
   872  		}
   873  		g.nodes[repo.Alias] = &repoNode{repo: repo}
   874  	}
   875  	for _, repo := range repos {
   876  		for _, link := range repo.CommitInflow {
   877  			if g.nodes[link.Alias] == nil {
   878  				return nil, fmt.Errorf("no repo with alias %q", link.Alias)
   879  			}
   880  			g.nodes[repo.Alias].addEdge(true, link, g.nodes[link.Alias])
   881  			g.nodes[link.Alias].addEdge(false, link, g.nodes[repo.Alias])
   882  		}
   883  	}
   884  	for alias, node := range g.nodes {
   885  		reachable := node.reachable(true)
   886  		if _, ok := reachable[node]; ok {
   887  			return nil, fmt.Errorf("%q lies on a cycle", alias)
   888  		}
   889  	}
   890  	return g, nil
   891  }
   892  
   893  func (g *repoGraph) nodeByRepo(url, branch string) *repoNode {
   894  	for _, node := range g.nodes {
   895  		if node.repo.URL == url && node.repo.Branch == branch {
   896  			return node
   897  		}
   898  	}
   899  	return nil
   900  }
   901  
   902  func (g *repoGraph) nodeByAlias(alias string) *repoNode {
   903  	for _, node := range g.nodes {
   904  		if node.repo.Alias == alias {
   905  			return node
   906  		}
   907  	}
   908  	return nil
   909  }
   910  
   911  func (g *repoGraph) forEachEdge(cb func(from, to *repoNode, info KernelRepoLink) error) error {
   912  	for _, node := range g.nodes {
   913  		for _, e := range node.edges {
   914  			if !e.in {
   915  				continue
   916  			}
   917  			err := cb(e.other, node, e.info)
   918  			if err != nil {
   919  				return err
   920  			}
   921  		}
   922  	}
   923  	return nil
   924  }
   925  
   926  // reachable returns a map *repoNode -> bool (whether commits are merged).
   927  func (n *repoNode) reachable(in bool) map[*repoNode]bool {
   928  	ret := map[*repoNode]bool{}
   929  	// First collect nodes only reachable via merge=true links.
   930  	n.reachableMerged(in, true, ret)
   931  	n.reachableMerged(in, false, ret)
   932  	return ret
   933  }
   934  
   935  func (n *repoNode) reachableMerged(in, onlyMerge bool, ret map[*repoNode]bool) {
   936  	var dfs func(*repoNode, bool)
   937  	dfs = func(node *repoNode, merge bool) {
   938  		for _, edge := range node.edges {
   939  			if edge.in != in || onlyMerge && !edge.info.Merge {
   940  				continue
   941  			}
   942  			if _, ok := ret[edge.other]; ok {
   943  				continue
   944  			}
   945  			ret[edge.other] = merge && edge.info.Merge
   946  			dfs(edge.other, merge && edge.info.Merge)
   947  		}
   948  	}
   949  	dfs(n, true)
   950  }
   951  
   952  func (n *repoNode) allReachable() map[*repoNode]bool {
   953  	ret := n.reachable(true)
   954  	for node, merge := range n.reachable(false) {
   955  		ret[node] = merge
   956  	}
   957  	return ret
   958  }
   959  
   960  func (n *repoNode) addEdge(in bool, info KernelRepoLink, other *repoNode) {
   961  	n.edges = append(n.edges, repoEdge{
   962  		in:    in,
   963  		info:  info,
   964  		other: other,
   965  	})
   966  }