github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/corpus/corpus.go (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package corpus
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"maps"
    10  	"sync"
    11  
    12  	"github.com/google/syzkaller/pkg/cover"
    13  	"github.com/google/syzkaller/pkg/hash"
    14  	"github.com/google/syzkaller/pkg/signal"
    15  	"github.com/google/syzkaller/pkg/stat"
    16  	"github.com/google/syzkaller/prog"
    17  )
    18  
    19  // Corpus object represents a set of syzkaller-found programs that
    20  // cover the kernel up to the currently reached frontiers.
    21  type Corpus struct {
    22  	ctx      context.Context
    23  	mu       sync.RWMutex
    24  	progsMap map[string]*Item
    25  	signal   signal.Signal // total signal of all items
    26  	cover    cover.Cover   // total coverage of all items
    27  	updates  chan<- NewItemEvent
    28  
    29  	*ProgramsList
    30  	StatProgs  *stat.Val
    31  	StatSignal *stat.Val
    32  	StatCover  *stat.Val
    33  
    34  	focusAreas []*focusAreaState
    35  }
    36  
    37  type focusAreaState struct {
    38  	FocusArea
    39  	*ProgramsList
    40  }
    41  
    42  type FocusArea struct {
    43  	Name     string // can be empty
    44  	CoverPCs map[uint64]struct{}
    45  	Weight   float64
    46  }
    47  
    48  func NewCorpus(ctx context.Context) *Corpus {
    49  	return NewMonitoredCorpus(ctx, nil)
    50  }
    51  
    52  func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus {
    53  	return NewFocusedCorpus(ctx, updates, nil)
    54  }
    55  
    56  func NewFocusedCorpus(ctx context.Context, updates chan<- NewItemEvent, areas []FocusArea) *Corpus {
    57  	corpus := &Corpus{
    58  		ctx:          ctx,
    59  		progsMap:     make(map[string]*Item),
    60  		updates:      updates,
    61  		ProgramsList: &ProgramsList{},
    62  	}
    63  	corpus.StatProgs = stat.New("corpus", "Number of test programs in the corpus", stat.Console,
    64  		stat.Link("/corpus"), stat.Graph("corpus"), stat.LenOf(&corpus.progsMap, &corpus.mu))
    65  	corpus.StatSignal = stat.New("signal", "Fuzzing signal in the corpus",
    66  		stat.LenOf(&corpus.signal, &corpus.mu))
    67  	corpus.StatCover = stat.New("coverage", "Source coverage in the corpus", stat.Console,
    68  		stat.Link("/cover"), stat.Prometheus("syz_corpus_cover"), stat.LenOf(&corpus.cover, &corpus.mu))
    69  	for _, area := range areas {
    70  		obj := &ProgramsList{}
    71  		if len(areas) > 1 && area.Name != "" {
    72  			// Only show extra statistics if there's more than one area.
    73  			stat.New("corpus ["+area.Name+"]",
    74  				fmt.Sprintf("Corpus programs of the focus area %q", area.Name),
    75  				stat.Console, stat.Graph("corpus"),
    76  				stat.LenOf(&obj.progs, &corpus.mu))
    77  		}
    78  		corpus.focusAreas = append(corpus.focusAreas, &focusAreaState{
    79  			FocusArea:    area,
    80  			ProgramsList: obj,
    81  		})
    82  	}
    83  	return corpus
    84  }
    85  
    86  // It may happen that a single program is relevant because of several
    87  // sysalls. In that case, there will be several ItemUpdate entities.
    88  type ItemUpdate struct {
    89  	Call     int
    90  	RawCover []uint64
    91  }
    92  
    93  // Item objects are to be treated as immutable, otherwise it's just
    94  // too hard to synchonize accesses to them across the whole project.
    95  // When Corpus updates one of its items, it saves a copy of it.
    96  type Item struct {
    97  	Sig     string
    98  	Call    int
    99  	Prog    *prog.Prog
   100  	HasAny  bool // whether the prog contains squashed arguments
   101  	Signal  signal.Signal
   102  	Cover   []uint64
   103  	Updates []ItemUpdate
   104  
   105  	areas map[*focusAreaState]struct{}
   106  }
   107  
   108  func (item Item) StringCall() string {
   109  	return item.Prog.CallName(item.Call)
   110  }
   111  
   112  type NewInput struct {
   113  	Prog     *prog.Prog
   114  	Call     int
   115  	Signal   signal.Signal
   116  	Cover    []uint64
   117  	RawCover []uint64
   118  }
   119  
   120  type NewItemEvent struct {
   121  	Sig      string
   122  	Exists   bool
   123  	ProgData []byte
   124  	NewCover []uint64
   125  }
   126  
   127  func (corpus *Corpus) Save(inp NewInput) {
   128  	progData := inp.Prog.Serialize()
   129  	sig := hash.String(progData)
   130  
   131  	corpus.mu.Lock()
   132  	defer corpus.mu.Unlock()
   133  
   134  	update := ItemUpdate{
   135  		Call:     inp.Call,
   136  		RawCover: inp.RawCover,
   137  	}
   138  	exists := false
   139  	if old, ok := corpus.progsMap[sig]; ok {
   140  		exists = true
   141  		newSignal := old.Signal.Copy()
   142  		newSignal.Merge(inp.Signal)
   143  		var newCover cover.Cover
   144  		newCover.Merge(old.Cover)
   145  		newCover.Merge(inp.Cover)
   146  		newItem := &Item{
   147  			Sig:     sig,
   148  			Prog:    old.Prog,
   149  			Call:    old.Call,
   150  			HasAny:  old.HasAny,
   151  			Signal:  newSignal,
   152  			Cover:   newCover.Serialize(),
   153  			Updates: append([]ItemUpdate{}, old.Updates...),
   154  			areas:   maps.Clone(old.areas),
   155  		}
   156  		const maxUpdates = 32
   157  		if len(newItem.Updates) < maxUpdates {
   158  			newItem.Updates = append(newItem.Updates, update)
   159  		}
   160  		corpus.progsMap[sig] = newItem
   161  		corpus.applyFocusAreas(newItem, inp.Cover)
   162  	} else {
   163  		item := &Item{
   164  			Sig:     sig,
   165  			Call:    inp.Call,
   166  			Prog:    inp.Prog,
   167  			HasAny:  inp.Prog.ContainsAny(),
   168  			Signal:  inp.Signal,
   169  			Cover:   inp.Cover,
   170  			Updates: []ItemUpdate{update},
   171  		}
   172  		corpus.progsMap[sig] = item
   173  		corpus.applyFocusAreas(item, inp.Cover)
   174  		corpus.saveProgram(inp.Prog, inp.Signal)
   175  	}
   176  	corpus.signal.Merge(inp.Signal)
   177  	newCover := corpus.cover.MergeDiff(inp.Cover)
   178  	if corpus.updates != nil {
   179  		select {
   180  		case <-corpus.ctx.Done():
   181  		case corpus.updates <- NewItemEvent{
   182  			Sig:      sig,
   183  			Exists:   exists,
   184  			ProgData: progData,
   185  			NewCover: newCover,
   186  		}:
   187  		}
   188  	}
   189  }
   190  
   191  func (corpus *Corpus) applyFocusAreas(item *Item, coverDelta []uint64) {
   192  	for _, area := range corpus.focusAreas {
   193  		matches := false
   194  		for _, pc := range coverDelta {
   195  			if _, ok := area.CoverPCs[pc]; ok {
   196  				matches = true
   197  				break
   198  			}
   199  		}
   200  		if !matches {
   201  			continue
   202  		}
   203  		area.saveProgram(item.Prog, item.Signal)
   204  		if item.areas == nil {
   205  			item.areas = make(map[*focusAreaState]struct{})
   206  			item.areas[area] = struct{}{}
   207  		}
   208  	}
   209  }
   210  
   211  func (corpus *Corpus) Signal() signal.Signal {
   212  	corpus.mu.RLock()
   213  	defer corpus.mu.RUnlock()
   214  	return corpus.signal.Copy()
   215  }
   216  
   217  func (corpus *Corpus) Items() []*Item {
   218  	corpus.mu.RLock()
   219  	defer corpus.mu.RUnlock()
   220  	ret := make([]*Item, 0, len(corpus.progsMap))
   221  	for _, item := range corpus.progsMap {
   222  		ret = append(ret, item)
   223  	}
   224  	return ret
   225  }
   226  
   227  func (corpus *Corpus) Item(sig string) *Item {
   228  	corpus.mu.RLock()
   229  	defer corpus.mu.RUnlock()
   230  	return corpus.progsMap[sig]
   231  }
   232  
   233  type CallCov struct {
   234  	Count int
   235  	Cover cover.Cover
   236  }
   237  
   238  func (corpus *Corpus) CallCover() map[string]*CallCov {
   239  	corpus.mu.RLock()
   240  	defer corpus.mu.RUnlock()
   241  	calls := make(map[string]*CallCov)
   242  	for _, inp := range corpus.progsMap {
   243  		call := inp.StringCall()
   244  		if calls[call] == nil {
   245  			calls[call] = new(CallCov)
   246  		}
   247  		cc := calls[call]
   248  		cc.Count++
   249  		cc.Cover.Merge(inp.Cover)
   250  	}
   251  	return calls
   252  }
   253  
   254  func (corpus *Corpus) ProgsPerArea() map[string]int {
   255  	corpus.mu.RLock()
   256  	defer corpus.mu.RUnlock()
   257  	ret := map[string]int{}
   258  	for _, item := range corpus.focusAreas {
   259  		ret[item.Name] = len(item.progs)
   260  	}
   261  	return ret
   262  }
   263  
   264  func (corpus *Corpus) Cover() []uint64 {
   265  	return corpus.cover.Serialize()
   266  }