github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/subsystem/extractor.go (about)

     1  // Copyright 2023 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // TODO: on the bug page, add a [debug subsystem assignment] link.
     5  // Only show it for admins.
     6  // The link runs subsystem assignment for the bug and returns the output.
     7  
     8  package subsystem
     9  
    10  import (
    11  	"math"
    12  	"strings"
    13  
    14  	"github.com/google/syzkaller/pkg/debugtracer"
    15  )
    16  
    17  // Extractor deduces the subsystems from the list of crashes.
    18  type Extractor struct {
    19  	raw rawExtractorInterface
    20  }
    21  
    22  // Crash represents the subset of the available crash information that's required for
    23  // subsystem inference.
    24  type Crash struct {
    25  	GuiltyPath string
    26  	SyzRepro   []byte
    27  }
    28  
    29  // rawExtractorInterface simplifies testing.
    30  type rawExtractorInterface interface {
    31  	FromPath(path string) []*Subsystem
    32  	FromProg(progBytes []byte) []*Subsystem
    33  }
    34  
    35  func MakeExtractor(list []*Subsystem) *Extractor {
    36  	return &Extractor{raw: makeRawExtractor(list)}
    37  }
    38  
    39  func (e *Extractor) Extract(crashes []*Crash) []*Subsystem {
    40  	return e.TracedExtract(crashes, &debugtracer.NullTracer{})
    41  }
    42  
    43  func (e *Extractor) TracedExtract(crashes []*Crash, tracer debugtracer.DebugTracer) []*Subsystem {
    44  	// First put all subsystems to the same list.
    45  	subsystems := []*Subsystem{}
    46  	reproCount := 0
    47  	for i, crash := range crashes {
    48  		if crash.GuiltyPath != "" {
    49  			extracted := e.raw.FromPath(crash.GuiltyPath)
    50  			tracer.Log("Crash #%d: guilty=%s subsystems=%s", i+1,
    51  				crash.GuiltyPath, e.readableSubsystems(extracted))
    52  			subsystems = append(subsystems, extracted...)
    53  		}
    54  		if len(crash.SyzRepro) != 0 {
    55  			reproCount++
    56  		}
    57  	}
    58  	subsystems = removeParents(subsystems)
    59  	counts := make(map[*Subsystem]int)
    60  	for _, entry := range subsystems {
    61  		counts[entry]++
    62  	}
    63  
    64  	// If all reproducers hint at the same subsystem, take it as well.
    65  	reproCounts := map[*Subsystem]int{}
    66  	fromRepro := []*Subsystem{}
    67  	for i, crash := range crashes {
    68  		if len(crash.SyzRepro) == 0 {
    69  			continue
    70  		}
    71  		reproSubsystems := e.raw.FromProg(crash.SyzRepro)
    72  		tracer.Log("Crash #%d: repro subsystems=%s", i+1, e.readableSubsystems(reproSubsystems))
    73  		for _, subsystem := range reproSubsystems {
    74  			reproCounts[subsystem]++
    75  			if reproCounts[subsystem] == reproCount {
    76  				tracer.Log("Subsystem %s exists in all reproducers", subsystem.Name)
    77  				fromRepro = append(fromRepro, subsystem)
    78  			}
    79  		}
    80  	}
    81  
    82  	// It can be the case that guilty paths point to several subsystems, but the reproducer
    83  	// can clearly point to one of them.
    84  	// Let's consider it to be the strongest singal.
    85  	if len(fromRepro) > 0 {
    86  		fromRepro = removeParents(fromRepro)
    87  		newSubsystems := []*Subsystem{}
    88  		for _, reproSubsystem := range fromRepro {
    89  			parents := reproSubsystem.ReachableParents()
    90  			parents[reproSubsystem] = struct{}{} // also include the subsystem itself
    91  			for _, subsystem := range subsystems {
    92  				if _, ok := parents[subsystem]; ok {
    93  					tracer.Log("Picking %s because %s is one of its parents",
    94  						reproSubsystem.Name, subsystem.Name)
    95  					newSubsystems = append(newSubsystems, reproSubsystem)
    96  					break
    97  				}
    98  			}
    99  		}
   100  		if len(newSubsystems) > 0 {
   101  			// Just pick those subsystems.
   102  			tracer.Log("Set %s because they appear both in repros and stack tracex",
   103  				e.readableSubsystems(newSubsystems))
   104  			return newSubsystems
   105  		}
   106  
   107  		// If there are sufficiently many reproducers that point to subsystems other than
   108  		// those from guilty paths, there's a chance we just didn't parse report correctly.
   109  		const cutOff = 3
   110  		if reproCount >= cutOff {
   111  			// But if the guilty paths are non-controversial, also take the leading candidate.
   112  			fromStacks := mostVoted(counts, 0.66)
   113  			tracer.Log("There are %d reproducers, so take %s from them and %s from stack traces",
   114  				reproCount, e.readableSubsystems(fromRepro), e.readableSubsystems(fromStacks))
   115  			return append(fromRepro, fromStacks...)
   116  		}
   117  	}
   118  
   119  	// Take subsystems from reproducers into account.
   120  	for _, entry := range fromRepro {
   121  		counts[entry] += reproCount
   122  	}
   123  
   124  	// Let's pick all subsystems that received >= 33% of votes (thus no more than 3).
   125  	afterVoting := mostVoted(counts, 0.33)
   126  	tracer.Log("Take %s from voting results", e.readableSubsystems(afterVoting))
   127  	return removeParents(afterVoting)
   128  }
   129  
   130  func (e *Extractor) readableSubsystems(list []*Subsystem) string {
   131  	var names []string
   132  	for _, item := range list {
   133  		names = append(names, item.Name)
   134  	}
   135  	return strings.Join(names, ", ")
   136  }
   137  
   138  // mostVoted picks subsystems that have received >= share votes.
   139  func mostVoted(counts map[*Subsystem]int, share float64) []*Subsystem {
   140  	total := 0
   141  	for _, count := range counts {
   142  		total += count
   143  	}
   144  	cutOff := int(math.Ceil(share * float64(total)))
   145  	ret := []*Subsystem{}
   146  	for entry, count := range counts {
   147  		if count < cutOff {
   148  			continue
   149  		}
   150  		ret = append(ret, entry)
   151  	}
   152  	return ret
   153  }
   154  
   155  func removeParents(subsystems []*Subsystem) []*Subsystem {
   156  	// If there are both parents and children, remove parents.
   157  	ignore := make(map[*Subsystem]struct{})
   158  	for _, entry := range subsystems {
   159  		for p := range entry.ReachableParents() {
   160  			ignore[p] = struct{}{}
   161  		}
   162  	}
   163  	var ret []*Subsystem
   164  	for _, entry := range subsystems {
   165  		if _, ok := ignore[entry]; ok {
   166  			continue
   167  		}
   168  		ret = append(ret, entry)
   169  	}
   170  	return ret
   171  }