github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/covermerger/covermerger.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package covermerger 5 6 import ( 7 "compress/gzip" 8 "context" 9 "encoding/csv" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "slices" 15 "strconv" 16 17 "github.com/google/syzkaller/pkg/coveragedb" 18 "github.com/google/syzkaller/pkg/log" 19 "golang.org/x/exp/maps" 20 "golang.org/x/sync/errgroup" 21 ) 22 23 const ( 24 KeyKernelRepo = "kernel_repo" 25 KeyKernelCommit = "kernel_commit" 26 KeyFilePath = "file_path" 27 KeyFuncName = "func_name" 28 KeyStartLine = "sl" 29 KeyHitCount = "hit_count" 30 KeyManager = "manager" 31 ) 32 33 type FileRecord struct { 34 FilePath string 35 FuncName string 36 RepoCommit 37 StartLine int 38 HitCount int 39 Manager string 40 } 41 42 type RepoCommit struct { 43 Repo string 44 Commit string 45 } 46 47 type MergeResult struct { 48 HitCounts map[int]int64 49 FileExists bool 50 LineDetails map[int][]*FileRecord 51 } 52 53 type FileCoverageMerger interface { 54 Add(record *FileRecord) 55 Result() *MergeResult 56 } 57 58 // MergeCSVWriteJSONL mergers input CSV and generates JSONL records. 59 // The amount of lines generated is [count(managers)+1] * [count(kernel_files)]. 60 // Returns (totalInstrumentedLines, totalCoveredLines, error). 61 func MergeCSVWriteJSONL(config *Config, descr *coveragedb.HistoryRecord, csvReader io.Reader, w io.Writer, 62 ) (int, int, error) { 63 eg, c := errgroup.WithContext(context.Background()) 64 mergeResults := make(chan *FileMergeResult) 65 eg.Go(func() error { 66 defer close(mergeResults) 67 if err := MergeCSVData(c, config, csvReader, mergeResults); err != nil { 68 return fmt.Errorf("covermerger.MergeCSVData: %w", err) 69 } 70 return nil 71 }) 72 var totalInstrumentedLines, totalCoveredLines int 73 eg.Go(func() error { 74 var encoder *json.Encoder 75 if w != nil { 76 gzw := gzip.NewWriter(w) 77 defer gzw.Close() 78 encoder = json.NewEncoder(gzw) 79 } 80 if encoder != nil { 81 if err := encoder.Encode(descr); err != nil { 82 return fmt.Errorf("encoder.Encode(MergedCoverageDescription): %w", err) 83 } 84 } 85 for fileMergeResult := range mergeResults { 86 dashCoverageRecords, dashFuncLines := mergedCoverageRecords(fileMergeResult) 87 if encoder != nil { 88 for _, record := range dashFuncLines { 89 if err := encoder.Encode(coveragedb.JSONLWrapper{FL: record}); err != nil { 90 return fmt.Errorf("encoder.Encode(FuncLines): %w", err) 91 } 92 } 93 for _, record := range dashCoverageRecords { 94 if err := encoder.Encode(coveragedb.JSONLWrapper{MCR: record}); err != nil { 95 return fmt.Errorf("encoder.Encode(MergedCoverageRecord): %w", err) 96 } 97 } 98 } 99 for _, hitCount := range fileMergeResult.HitCounts { 100 totalInstrumentedLines++ 101 if hitCount > 0 { 102 totalCoveredLines++ 103 } 104 } 105 } 106 return nil 107 }) 108 if err := eg.Wait(); err != nil { 109 return 0, 0, fmt.Errorf("eg.Wait: %w", err) 110 } 111 return totalInstrumentedLines, totalCoveredLines, nil 112 } 113 114 const allManagers = "*" 115 116 func mergedCoverageRecords(fmr *FileMergeResult) ([]*coveragedb.MergedCoverageRecord, []*coveragedb.FuncLines) { 117 if !fmr.FileExists { 118 return nil, nil 119 } 120 lines := maps.Keys(fmr.HitCounts) 121 slices.Sort(lines) 122 mgrStat := make(map[string]*coveragedb.Coverage) 123 mgrStat[allManagers] = &coveragedb.Coverage{} 124 125 funcLines := map[string]*coveragedb.FuncLines{} 126 for _, line := range lines { 127 mgrStat[allManagers].AddLineHitCount(line, fmr.HitCounts[line]) 128 managerHitCounts := map[string]int64{} 129 var srcFuncs []string 130 for _, lineDetail := range fmr.LineDetails[line] { 131 srcFuncs = append(srcFuncs, lineDetail.FuncName) 132 manager := lineDetail.Manager 133 managerHitCounts[manager] += int64(lineDetail.HitCount) 134 } 135 if funcName := bestFuncName(srcFuncs); funcName != "" { 136 if _, ok := funcLines[funcName]; !ok { 137 funcLines[funcName] = &coveragedb.FuncLines{ 138 FilePath: fmr.FilePath, 139 FuncName: funcName, 140 } 141 } 142 funcLines[funcName].Lines = append(funcLines[funcName].Lines, int64(line)) 143 } 144 for manager, managerHitCount := range managerHitCounts { 145 if _, ok := mgrStat[manager]; !ok { 146 mgrStat[manager] = &coveragedb.Coverage{} 147 } 148 mgrStat[manager].AddLineHitCount(line, managerHitCount) 149 } 150 } 151 152 res := []*coveragedb.MergedCoverageRecord{} 153 for managerName, managerCoverage := range mgrStat { 154 res = append(res, &coveragedb.MergedCoverageRecord{ 155 Manager: managerName, 156 FilePath: fmr.FilePath, 157 FileData: managerCoverage, 158 }) 159 } 160 return res, maps.Values(funcLines) 161 } 162 163 // bestFuncName selects the most frequent function from the list of candidates. 164 // If a function was renamed during the collection period, we have to pick one name to display the coverage. 165 // 166 // The better alternative is to get the function name from the C code. But it looks more complex for now. 167 func bestFuncName(names []string) string { 168 stat := map[string]int{} 169 for _, name := range names { 170 stat[name]++ 171 } 172 bestName := "" 173 bestCount := 0 174 for name, count := range stat { 175 if name != "" && count > bestCount { 176 bestName = name 177 bestCount = count 178 } 179 } 180 return bestName 181 } 182 183 func batchFileData(c *Config, targetFilePath string, records []*FileRecord) (*MergeResult, error) { 184 log.Logf(1, "processing %d records for %s", len(records), targetFilePath) 185 repoCommitsMap := make(map[RepoCommit]bool) 186 for _, record := range records { 187 repoCommitsMap[record.RepoCommit] = true 188 } 189 repoCommitsMap[c.Base] = true 190 repoCommits := maps.Keys(repoCommitsMap) 191 fvs, err := c.FileVersProvider.GetFileVersions(targetFilePath, repoCommits...) 192 if err != nil { 193 return nil, fmt.Errorf("failed to getFileVersions: %w", err) 194 } 195 merger := makeFileLineCoverMerger(fvs, c.Base) 196 for _, record := range records { 197 merger.Add(record) 198 } 199 return merger.Result(), nil 200 } 201 202 func makeRecord(fields, schema []string) (*FileRecord, error) { 203 if len(fields) != len(schema) { 204 return nil, errors.New("fields size and schema size are not equal") 205 } 206 record := &FileRecord{} 207 for i, val := range fields { 208 key := schema[i] 209 var err error 210 switch key { 211 case KeyFilePath: 212 record.FilePath = val 213 case KeyFuncName: 214 record.FuncName = val 215 case KeyKernelRepo: 216 record.Repo = val 217 case KeyKernelCommit: 218 record.Commit = val 219 case KeyStartLine: 220 record.StartLine, err = readIntField(key, val) 221 case KeyHitCount: 222 record.HitCount, err = readIntField(key, val) 223 case KeyManager: 224 record.Manager = val 225 } 226 if err != nil { 227 return nil, err 228 } 229 } 230 return record, nil 231 } 232 233 func readIntField(field, val string) (int, error) { 234 res, err := strconv.Atoi(val) 235 if err != nil { 236 return -1, fmt.Errorf("failed to Atoi(%s) %s: %w", val, field, err) 237 } 238 return res, nil 239 } 240 241 type Config struct { 242 Jobs int 243 Workdir string 244 skipRepoClone bool 245 Base RepoCommit 246 FileVersProvider FileVersProvider 247 } 248 249 func isSchema(fields, schema []string) bool { 250 if len(fields) != len(schema) { 251 return false 252 } 253 for i := 0; i < len(fields); i++ { 254 if fields[i] != schema[i] { 255 return false 256 } 257 } 258 return true 259 } 260 261 type FileMergeResult struct { 262 FilePath string 263 *MergeResult 264 } 265 266 func MergeCSVData(c context.Context, config *Config, reader io.Reader, results chan<- *FileMergeResult) error { 267 var schema []string 268 csvReader := csv.NewReader(reader) 269 if fields, err := csvReader.Read(); err != nil { 270 return fmt.Errorf("failed to read schema: %w", err) 271 } else { 272 schema = fields 273 } 274 errStreamChan := make(chan error, 2) 275 recordsChan := make(chan *FileRecord) 276 go func() { 277 defer close(recordsChan) 278 defer func() { errStreamChan <- nil }() 279 for { 280 fields, err := csvReader.Read() 281 if err == io.EOF { 282 break 283 } 284 if err != nil { 285 errStreamChan <- fmt.Errorf("failed to read CSV line: %w", err) 286 return 287 } 288 if isSchema(fields, schema) { 289 // The input may be the merged CVS files with multiple schemas. 290 continue 291 } 292 record, err := makeRecord(fields, schema) 293 if err != nil { 294 errStreamChan <- fmt.Errorf("makeRecord: %w", err) 295 return 296 } 297 select { 298 case <-c.Done(): 299 return 300 case recordsChan <- record: 301 } 302 } 303 }() 304 errMerging := mergeChanData(c, config, recordsChan, results) 305 errStream := <-errStreamChan 306 if errMerging != nil || errStream != nil { 307 return fmt.Errorf("errors merging stream data:\nmerger err: %w\nstream reader err: %w", 308 errMerging, errStream) 309 } 310 return nil 311 } 312 313 type FileRecords struct { 314 fileName string 315 records []*FileRecord 316 } 317 318 func mergeChanData(c context.Context, cfg *Config, recordChan <-chan *FileRecord, results chan<- *FileMergeResult, 319 ) error { 320 g := errgroup.Group{} 321 frecordChan := groupFileRecords(recordChan, c) 322 323 for i := 0; i < cfg.Jobs; i++ { 324 g.Go(func() error { 325 for frecord := range frecordChan { 326 mr, err := batchFileData(cfg, frecord.fileName, frecord.records) 327 if err != nil { 328 return fmt.Errorf("failed to batchFileData(%s): %w", frecord.fileName, err) 329 } 330 select { 331 case <-c.Done(): 332 return nil 333 case results <- &FileMergeResult{ 334 FilePath: frecord.fileName, 335 MergeResult: mr}: 336 } 337 } 338 return nil 339 }) 340 } 341 return g.Wait() 342 } 343 344 func groupFileRecords(recordChan <-chan *FileRecord, ctx context.Context) chan FileRecords { 345 frecordChan := make(chan FileRecords) 346 go func() { 347 defer close(frecordChan) 348 targetFile := "" 349 var records []*FileRecord 350 for record := range recordChan { 351 select { 352 case <-ctx.Done(): 353 return 354 default: 355 } 356 curTargetFile := record.FilePath 357 if targetFile == "" { 358 targetFile = curTargetFile 359 } 360 if curTargetFile != targetFile { 361 select { 362 case <-ctx.Done(): 363 return 364 case frecordChan <- FileRecords{ 365 fileName: targetFile, 366 records: records}: 367 } 368 records = nil 369 targetFile = curTargetFile 370 } 371 records = append(records, record) 372 } 373 select { 374 case <-ctx.Done(): 375 case frecordChan <- FileRecords{ 376 fileName: targetFile, 377 records: records}: 378 } 379 }() 380 return frecordChan 381 }