github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/covermerger/covermerger.go (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package covermerger
     5  
     6  import (
     7  	"compress/gzip"
     8  	"context"
     9  	"encoding/csv"
    10  	"encoding/json"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"slices"
    15  	"strconv"
    16  
    17  	"github.com/google/syzkaller/pkg/coveragedb"
    18  	"github.com/google/syzkaller/pkg/log"
    19  	"golang.org/x/exp/maps"
    20  	"golang.org/x/sync/errgroup"
    21  )
    22  
    23  const (
    24  	KeyKernelRepo   = "kernel_repo"
    25  	KeyKernelCommit = "kernel_commit"
    26  	KeyFilePath     = "file_path"
    27  	KeyFuncName     = "func_name"
    28  	KeyStartLine    = "sl"
    29  	KeyHitCount     = "hit_count"
    30  	KeyManager      = "manager"
    31  )
    32  
    33  type FileRecord struct {
    34  	FilePath string
    35  	FuncName string
    36  	RepoCommit
    37  	StartLine int
    38  	HitCount  int
    39  	Manager   string
    40  }
    41  
    42  type RepoCommit struct {
    43  	Repo   string
    44  	Commit string
    45  }
    46  
    47  type MergeResult struct {
    48  	HitCounts   map[int]int64
    49  	FileExists  bool
    50  	LineDetails map[int][]*FileRecord
    51  }
    52  
    53  type FileCoverageMerger interface {
    54  	Add(record *FileRecord)
    55  	Result() *MergeResult
    56  }
    57  
    58  // MergeCSVWriteJSONL mergers input CSV and generates JSONL records.
    59  // The amount of lines generated is [count(managers)+1] * [count(kernel_files)].
    60  // Returns (totalInstrumentedLines, totalCoveredLines, error).
    61  func MergeCSVWriteJSONL(config *Config, descr *coveragedb.HistoryRecord, csvReader io.Reader, w io.Writer,
    62  ) (int, int, error) {
    63  	eg, c := errgroup.WithContext(context.Background())
    64  	mergeResults := make(chan *FileMergeResult)
    65  	eg.Go(func() error {
    66  		defer close(mergeResults)
    67  		if err := MergeCSVData(c, config, csvReader, mergeResults); err != nil {
    68  			return fmt.Errorf("covermerger.MergeCSVData: %w", err)
    69  		}
    70  		return nil
    71  	})
    72  	var totalInstrumentedLines, totalCoveredLines int
    73  	eg.Go(func() error {
    74  		var encoder *json.Encoder
    75  		if w != nil {
    76  			gzw := gzip.NewWriter(w)
    77  			defer gzw.Close()
    78  			encoder = json.NewEncoder(gzw)
    79  		}
    80  		if encoder != nil {
    81  			if err := encoder.Encode(descr); err != nil {
    82  				return fmt.Errorf("encoder.Encode(MergedCoverageDescription): %w", err)
    83  			}
    84  		}
    85  		for fileMergeResult := range mergeResults {
    86  			dashCoverageRecords, dashFuncLines := mergedCoverageRecords(fileMergeResult)
    87  			if encoder != nil {
    88  				for _, record := range dashFuncLines {
    89  					if err := encoder.Encode(coveragedb.JSONLWrapper{FL: record}); err != nil {
    90  						return fmt.Errorf("encoder.Encode(FuncLines): %w", err)
    91  					}
    92  				}
    93  				for _, record := range dashCoverageRecords {
    94  					if err := encoder.Encode(coveragedb.JSONLWrapper{MCR: record}); err != nil {
    95  						return fmt.Errorf("encoder.Encode(MergedCoverageRecord): %w", err)
    96  					}
    97  				}
    98  			}
    99  			for _, hitCount := range fileMergeResult.HitCounts {
   100  				totalInstrumentedLines++
   101  				if hitCount > 0 {
   102  					totalCoveredLines++
   103  				}
   104  			}
   105  		}
   106  		return nil
   107  	})
   108  	if err := eg.Wait(); err != nil {
   109  		return 0, 0, fmt.Errorf("eg.Wait: %w", err)
   110  	}
   111  	return totalInstrumentedLines, totalCoveredLines, nil
   112  }
   113  
   114  const allManagers = "*"
   115  
   116  func mergedCoverageRecords(fmr *FileMergeResult) ([]*coveragedb.MergedCoverageRecord, []*coveragedb.FuncLines) {
   117  	if !fmr.FileExists {
   118  		return nil, nil
   119  	}
   120  	lines := maps.Keys(fmr.HitCounts)
   121  	slices.Sort(lines)
   122  	mgrStat := make(map[string]*coveragedb.Coverage)
   123  	mgrStat[allManagers] = &coveragedb.Coverage{}
   124  
   125  	funcLines := map[string]*coveragedb.FuncLines{}
   126  	for _, line := range lines {
   127  		mgrStat[allManagers].AddLineHitCount(line, fmr.HitCounts[line])
   128  		managerHitCounts := map[string]int64{}
   129  		var srcFuncs []string
   130  		for _, lineDetail := range fmr.LineDetails[line] {
   131  			srcFuncs = append(srcFuncs, lineDetail.FuncName)
   132  			manager := lineDetail.Manager
   133  			managerHitCounts[manager] += int64(lineDetail.HitCount)
   134  		}
   135  		if funcName := bestFuncName(srcFuncs); funcName != "" {
   136  			if _, ok := funcLines[funcName]; !ok {
   137  				funcLines[funcName] = &coveragedb.FuncLines{
   138  					FilePath: fmr.FilePath,
   139  					FuncName: funcName,
   140  				}
   141  			}
   142  			funcLines[funcName].Lines = append(funcLines[funcName].Lines, int64(line))
   143  		}
   144  		for manager, managerHitCount := range managerHitCounts {
   145  			if _, ok := mgrStat[manager]; !ok {
   146  				mgrStat[manager] = &coveragedb.Coverage{}
   147  			}
   148  			mgrStat[manager].AddLineHitCount(line, managerHitCount)
   149  		}
   150  	}
   151  
   152  	res := []*coveragedb.MergedCoverageRecord{}
   153  	for managerName, managerCoverage := range mgrStat {
   154  		res = append(res, &coveragedb.MergedCoverageRecord{
   155  			Manager:  managerName,
   156  			FilePath: fmr.FilePath,
   157  			FileData: managerCoverage,
   158  		})
   159  	}
   160  	return res, maps.Values(funcLines)
   161  }
   162  
   163  // bestFuncName selects the most frequent function from the list of candidates.
   164  // If a function was renamed during the collection period, we have to pick one name to display the coverage.
   165  //
   166  // The better alternative is to get the function name from the C code. But it looks more complex for now.
   167  func bestFuncName(names []string) string {
   168  	stat := map[string]int{}
   169  	for _, name := range names {
   170  		stat[name]++
   171  	}
   172  	bestName := ""
   173  	bestCount := 0
   174  	for name, count := range stat {
   175  		if name != "" && count > bestCount {
   176  			bestName = name
   177  			bestCount = count
   178  		}
   179  	}
   180  	return bestName
   181  }
   182  
   183  func batchFileData(c *Config, targetFilePath string, records []*FileRecord) (*MergeResult, error) {
   184  	log.Logf(1, "processing %d records for %s", len(records), targetFilePath)
   185  	repoCommitsMap := make(map[RepoCommit]bool)
   186  	for _, record := range records {
   187  		repoCommitsMap[record.RepoCommit] = true
   188  	}
   189  	repoCommitsMap[c.Base] = true
   190  	repoCommits := maps.Keys(repoCommitsMap)
   191  	fvs, err := c.FileVersProvider.GetFileVersions(targetFilePath, repoCommits...)
   192  	if err != nil {
   193  		return nil, fmt.Errorf("failed to getFileVersions: %w", err)
   194  	}
   195  	merger := makeFileLineCoverMerger(fvs, c.Base)
   196  	for _, record := range records {
   197  		merger.Add(record)
   198  	}
   199  	return merger.Result(), nil
   200  }
   201  
   202  func makeRecord(fields, schema []string) (*FileRecord, error) {
   203  	if len(fields) != len(schema) {
   204  		return nil, errors.New("fields size and schema size are not equal")
   205  	}
   206  	record := &FileRecord{}
   207  	for i, val := range fields {
   208  		key := schema[i]
   209  		var err error
   210  		switch key {
   211  		case KeyFilePath:
   212  			record.FilePath = val
   213  		case KeyFuncName:
   214  			record.FuncName = val
   215  		case KeyKernelRepo:
   216  			record.Repo = val
   217  		case KeyKernelCommit:
   218  			record.Commit = val
   219  		case KeyStartLine:
   220  			record.StartLine, err = readIntField(key, val)
   221  		case KeyHitCount:
   222  			record.HitCount, err = readIntField(key, val)
   223  		case KeyManager:
   224  			record.Manager = val
   225  		}
   226  		if err != nil {
   227  			return nil, err
   228  		}
   229  	}
   230  	return record, nil
   231  }
   232  
   233  func readIntField(field, val string) (int, error) {
   234  	res, err := strconv.Atoi(val)
   235  	if err != nil {
   236  		return -1, fmt.Errorf("failed to Atoi(%s) %s: %w", val, field, err)
   237  	}
   238  	return res, nil
   239  }
   240  
   241  type Config struct {
   242  	Jobs             int
   243  	Workdir          string
   244  	skipRepoClone    bool
   245  	Base             RepoCommit
   246  	FileVersProvider FileVersProvider
   247  }
   248  
   249  func isSchema(fields, schema []string) bool {
   250  	if len(fields) != len(schema) {
   251  		return false
   252  	}
   253  	for i := 0; i < len(fields); i++ {
   254  		if fields[i] != schema[i] {
   255  			return false
   256  		}
   257  	}
   258  	return true
   259  }
   260  
   261  type FileMergeResult struct {
   262  	FilePath string
   263  	*MergeResult
   264  }
   265  
   266  func MergeCSVData(c context.Context, config *Config, reader io.Reader, results chan<- *FileMergeResult) error {
   267  	var schema []string
   268  	csvReader := csv.NewReader(reader)
   269  	if fields, err := csvReader.Read(); err != nil {
   270  		return fmt.Errorf("failed to read schema: %w", err)
   271  	} else {
   272  		schema = fields
   273  	}
   274  	errStreamChan := make(chan error, 2)
   275  	recordsChan := make(chan *FileRecord)
   276  	go func() {
   277  		defer close(recordsChan)
   278  		defer func() { errStreamChan <- nil }()
   279  		for {
   280  			fields, err := csvReader.Read()
   281  			if err == io.EOF {
   282  				break
   283  			}
   284  			if err != nil {
   285  				errStreamChan <- fmt.Errorf("failed to read CSV line: %w", err)
   286  				return
   287  			}
   288  			if isSchema(fields, schema) {
   289  				// The input may be the merged CVS files with multiple schemas.
   290  				continue
   291  			}
   292  			record, err := makeRecord(fields, schema)
   293  			if err != nil {
   294  				errStreamChan <- fmt.Errorf("makeRecord: %w", err)
   295  				return
   296  			}
   297  			select {
   298  			case <-c.Done():
   299  				return
   300  			case recordsChan <- record:
   301  			}
   302  		}
   303  	}()
   304  	errMerging := mergeChanData(c, config, recordsChan, results)
   305  	errStream := <-errStreamChan
   306  	if errMerging != nil || errStream != nil {
   307  		return fmt.Errorf("errors merging stream data:\nmerger err: %w\nstream reader err: %w",
   308  			errMerging, errStream)
   309  	}
   310  	return nil
   311  }
   312  
   313  type FileRecords struct {
   314  	fileName string
   315  	records  []*FileRecord
   316  }
   317  
   318  func mergeChanData(c context.Context, cfg *Config, recordChan <-chan *FileRecord, results chan<- *FileMergeResult,
   319  ) error {
   320  	g := errgroup.Group{}
   321  	frecordChan := groupFileRecords(recordChan, c)
   322  
   323  	for i := 0; i < cfg.Jobs; i++ {
   324  		g.Go(func() error {
   325  			for frecord := range frecordChan {
   326  				mr, err := batchFileData(cfg, frecord.fileName, frecord.records)
   327  				if err != nil {
   328  					return fmt.Errorf("failed to batchFileData(%s): %w", frecord.fileName, err)
   329  				}
   330  				select {
   331  				case <-c.Done():
   332  					return nil
   333  				case results <- &FileMergeResult{
   334  					FilePath:    frecord.fileName,
   335  					MergeResult: mr}:
   336  				}
   337  			}
   338  			return nil
   339  		})
   340  	}
   341  	return g.Wait()
   342  }
   343  
   344  func groupFileRecords(recordChan <-chan *FileRecord, ctx context.Context) chan FileRecords {
   345  	frecordChan := make(chan FileRecords)
   346  	go func() {
   347  		defer close(frecordChan)
   348  		targetFile := ""
   349  		var records []*FileRecord
   350  		for record := range recordChan {
   351  			select {
   352  			case <-ctx.Done():
   353  				return
   354  			default:
   355  			}
   356  			curTargetFile := record.FilePath
   357  			if targetFile == "" {
   358  				targetFile = curTargetFile
   359  			}
   360  			if curTargetFile != targetFile {
   361  				select {
   362  				case <-ctx.Done():
   363  					return
   364  				case frecordChan <- FileRecords{
   365  					fileName: targetFile,
   366  					records:  records}:
   367  				}
   368  				records = nil
   369  				targetFile = curTargetFile
   370  			}
   371  			records = append(records, record)
   372  		}
   373  		select {
   374  		case <-ctx.Done():
   375  		case frecordChan <- FileRecords{
   376  			fileName: targetFile,
   377  			records:  records}:
   378  		}
   379  	}()
   380  	return frecordChan
   381  }