github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/coveragedb/coveragedb.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package coveragedb 5 6 import ( 7 "context" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "io" 12 "maps" 13 "sync/atomic" 14 "time" 15 16 "cloud.google.com/go/civil" 17 "cloud.google.com/go/spanner" 18 "github.com/google/syzkaller/pkg/coveragedb/spannerclient" 19 "github.com/google/syzkaller/pkg/subsystem" 20 _ "github.com/google/syzkaller/pkg/subsystem/lists" 21 "github.com/google/uuid" 22 "golang.org/x/sync/errgroup" 23 "google.golang.org/api/iterator" 24 ) 25 26 type HistoryRecord struct { 27 Session string 28 Time time.Time 29 Namespace string 30 Repo string 31 Commit string 32 Duration int64 33 DateTo civil.Date 34 TotalRows int64 35 } 36 37 type MergedCoverageRecord struct { 38 Manager string 39 FilePath string 40 FileData *Coverage 41 } 42 43 type JSONLWrapper struct { 44 MCR *MergedCoverageRecord 45 FL *FuncLines 46 } 47 48 type Coverage struct { 49 Instrumented int64 50 Covered int64 51 LinesInstrumented []int64 52 HitCounts []int64 53 } 54 55 func (c *Coverage) AddLineHitCount(line int, hitCount int64) { 56 c.Instrumented++ 57 c.LinesInstrumented = append(c.LinesInstrumented, int64(line)) 58 c.HitCounts = append(c.HitCounts, hitCount) 59 if hitCount > 0 { 60 c.Covered++ 61 } 62 } 63 64 type filesRecord struct { 65 Session string 66 FilePath string 67 Instrumented int64 68 Covered int64 69 LinesInstrumented []int64 70 HitCounts []int64 71 Manager string // "*" means "collected from all managers" 72 } 73 74 type functionsRecord struct { 75 Session string 76 FilePath string 77 FuncName string 78 Lines []int64 79 } 80 81 type fileSubsystems struct { 82 Namespace string 83 FilePath string 84 Subsystems []string 85 } 86 87 func SaveMergeResult(ctx context.Context, client spannerclient.SpannerClient, descr *HistoryRecord, dec *json.Decoder, 88 ) (int, error) { 89 if client == nil { 90 return 0, fmt.Errorf("nil spannerclient") 91 } 92 var rowsCreated int 93 session := uuid.New().String() 94 var mutations []*spanner.Mutation 95 96 for { 97 var wr JSONLWrapper 98 err := dec.Decode(&wr) 99 if err == io.EOF { 100 break 101 } 102 if err != nil { 103 return rowsCreated, fmt.Errorf("dec.Decode(MergedCoverageRecord): %w", err) 104 } 105 if mcr := wr.MCR; mcr != nil { 106 mutations = append(mutations, fileRecordMutation(session, mcr)) 107 } else if fl := wr.FL; fl != nil { 108 mutations = append(mutations, fileFunctionsMutation(session, fl)) 109 } else { 110 return rowsCreated, errors.New("JSONLWrapper can't be empty") 111 } 112 // There is a limit on the number of mutations per transaction (80k) imposed by the DB. 113 // This includes both explicit mutations of the fields (6 fields * 1k records = 6k mutations) 114 // and implicit index mutations. 115 // We keep the number of records low enough for the number of explicit mutations * 10 does not exceed the limit. 116 if len(mutations) >= 1000 { 117 if _, err := client.Apply(ctx, mutations); err != nil { 118 return rowsCreated, fmt.Errorf("failed to spanner.Apply(inserts): %s", err.Error()) 119 } 120 rowsCreated += len(mutations) 121 mutations = nil 122 } 123 } 124 125 mutations = append(mutations, historyMutation(session, descr)) 126 if _, err := client.Apply(ctx, mutations); err != nil { 127 return rowsCreated, fmt.Errorf("failed to spanner.Apply(inserts): %s", err.Error()) 128 } 129 rowsCreated += len(mutations) 130 return rowsCreated, nil 131 } 132 133 type LinesCoverage struct { 134 LinesInstrumented []int64 135 HitCounts []int64 136 } 137 138 func linesCoverageStmt(ns, filepath, commit, manager string, timePeriod TimePeriod) spanner.Statement { 139 if manager == "" { 140 manager = "*" 141 } 142 return spanner.Statement{ 143 SQL: ` 144 select 145 linesinstrumented, 146 hitcounts 147 from merge_history 148 join files 149 on merge_history.session = files.session 150 where 151 namespace=$1 and dateto=$2 and duration=$3 and filepath=$4 and commit=$5 and manager=$6`, 152 Params: map[string]interface{}{ 153 "p1": ns, 154 "p2": timePeriod.DateTo, 155 "p3": timePeriod.Days, 156 "p4": filepath, 157 "p5": commit, 158 "p6": manager, 159 }, 160 } 161 } 162 163 func ReadLinesHitCount(ctx context.Context, client spannerclient.SpannerClient, 164 ns, commit, file, manager string, tp TimePeriod, 165 ) ([]int64, []int64, error) { 166 stmt := linesCoverageStmt(ns, file, commit, manager, tp) 167 iter := client.Single().Query(ctx, stmt) 168 defer iter.Stop() 169 170 row, err := iter.Next() 171 if err == iterator.Done { 172 return nil, nil, nil 173 } 174 if err != nil { 175 return nil, nil, fmt.Errorf("iter.Next: %w", err) 176 } 177 var r LinesCoverage 178 if err = row.ToStruct(&r); err != nil { 179 return nil, nil, fmt.Errorf("failed to row.ToStruct() spanner DB: %w", err) 180 } 181 if _, err := iter.Next(); err != iterator.Done { 182 return nil, nil, fmt.Errorf("more than 1 line is available") 183 } 184 return r.LinesInstrumented, r.HitCounts, nil 185 } 186 187 func historyMutation(session string, template *HistoryRecord) *spanner.Mutation { 188 historyInsert, err := spanner.InsertOrUpdateStruct("merge_history", &HistoryRecord{ 189 Session: session, 190 Time: time.Now(), 191 Namespace: template.Namespace, 192 Repo: template.Repo, 193 Commit: template.Commit, 194 Duration: template.Duration, 195 DateTo: template.DateTo, 196 TotalRows: template.TotalRows, 197 }) 198 if err != nil { 199 panic(fmt.Sprintf("failed to spanner.InsertStruct(): %s", err.Error())) 200 } 201 return historyInsert 202 } 203 204 func fileFunctionsMutation(session string, fl *FuncLines) *spanner.Mutation { 205 insert, err := spanner.InsertOrUpdateStruct("functions", &functionsRecord{ 206 Session: session, 207 FilePath: fl.FilePath, 208 FuncName: fl.FuncName, 209 Lines: fl.Lines, 210 }) 211 if err != nil { 212 panic(fmt.Sprintf("failed to fileFunctionsMutation: %v", err)) 213 } 214 return insert 215 } 216 217 func fileRecordMutation(session string, mcr *MergedCoverageRecord) *spanner.Mutation { 218 insert, err := spanner.InsertOrUpdateStruct("files", &filesRecord{ 219 Session: session, 220 FilePath: mcr.FilePath, 221 Instrumented: mcr.FileData.Instrumented, 222 Covered: mcr.FileData.Covered, 223 LinesInstrumented: mcr.FileData.LinesInstrumented, 224 HitCounts: mcr.FileData.HitCounts, 225 Manager: mcr.Manager, 226 }) 227 if err != nil { 228 panic(fmt.Sprintf("failed to fileRecordMutation: %v", err)) 229 } 230 return insert 231 } 232 233 func fileSubsystemsMutation(ns, filePath string, subsystems []string) *spanner.Mutation { 234 insert, err := spanner.InsertOrUpdateStruct("file_subsystems", &fileSubsystems{ 235 Namespace: ns, 236 FilePath: filePath, 237 Subsystems: subsystems, 238 }) 239 if err != nil { 240 panic(fmt.Sprintf("failed to fileSubsystemsMutation(): %s", err.Error())) 241 } 242 return insert 243 } 244 245 func getFileSubsystems(filePath string, ssMatcher *subsystem.PathMatcher, ssCache map[string][]string) []string { 246 sss, cached := ssCache[filePath] 247 if !cached { 248 for _, match := range ssMatcher.Match(filePath) { 249 sss = append(sss, match.Name) 250 } 251 ssCache[filePath] = sss 252 } 253 return sss 254 } 255 256 func NsDataMerged(ctx context.Context, client spannerclient.SpannerClient, ns string, 257 ) ([]TimePeriod, []int64, error) { 258 if client == nil { 259 return nil, nil, fmt.Errorf("nil spannerclient") 260 } 261 stmt := spanner.Statement{ 262 SQL: ` 263 select 264 dateto, 265 duration as days, 266 totalrows 267 from merge_history 268 where 269 namespace=$1`, 270 Params: map[string]interface{}{ 271 "p1": ns, 272 }, 273 } 274 iter := client.Single().Query(ctx, stmt) 275 defer iter.Stop() 276 var periods []TimePeriod 277 var totalRows []int64 278 for { 279 row, err := iter.Next() 280 if err == iterator.Done { 281 break 282 } 283 if err != nil { 284 return nil, nil, fmt.Errorf("failed to iter.Next() spanner DB: %w", err) 285 } 286 var r struct { 287 Days int64 288 DateTo civil.Date 289 TotalRows int64 290 } 291 if err = row.ToStruct(&r); err != nil { 292 return nil, nil, fmt.Errorf("failed to row.ToStruct() spanner DB: %w", err) 293 } 294 periods = append(periods, TimePeriod{DateTo: r.DateTo, Days: int(r.Days)}) 295 totalRows = append(totalRows, r.TotalRows) 296 } 297 return periods, totalRows, nil 298 } 299 300 // DeleteGarbage removes orphaned file entries from the database. 301 // 302 // It identifies files in the "files" table that are not referenced by any entries in the "merge_history" table, 303 // indicating they are no longer associated with an active merge session. 304 // 305 // To avoid exceeding Spanner transaction limits, orphaned files are deleted in batches of 10,000. 306 // Note that in case of an error during batch deletion, some files may be deleted but not counted in the total. 307 // 308 // Returns the number of orphaned file entries successfully deleted. 309 func DeleteGarbage(ctx context.Context, client spannerclient.SpannerClient) (int64, error) { 310 batchSize := 10_000 311 if client == nil { 312 return 0, fmt.Errorf("nil spannerclient") 313 } 314 315 iter := client.Single().Query(ctx, spanner.Statement{ 316 SQL: `SELECT session, filepath 317 FROM files 318 WHERE NOT EXISTS ( 319 SELECT 1 320 FROM merge_history 321 WHERE merge_history.session = files.session 322 )`}) 323 defer iter.Stop() 324 325 var totalDeleted atomic.Int64 326 eg, _ := errgroup.WithContext(ctx) 327 var batch []spanner.Key 328 for { 329 row, err := iter.Next() 330 if err == iterator.Done { 331 break 332 } 333 if err != nil { 334 return 0, fmt.Errorf("iter.Next: %w", err) 335 } 336 var r struct { 337 Session string 338 Filepath string 339 } 340 if err = row.ToStruct(&r); err != nil { 341 return 0, fmt.Errorf("row.ToStruct: %w", err) 342 } 343 batch = append(batch, spanner.Key{r.Session, r.Filepath}) 344 if len(batch) > batchSize { 345 goSpannerDelete(ctx, batch, eg, client, &totalDeleted) 346 batch = nil 347 } 348 } 349 goSpannerDelete(ctx, batch, eg, client, &totalDeleted) 350 if err := eg.Wait(); err != nil { 351 return 0, fmt.Errorf("spanner.Delete: %w", err) 352 } 353 return totalDeleted.Load(), nil 354 } 355 356 func goSpannerDelete(ctx context.Context, batch []spanner.Key, eg *errgroup.Group, client spannerclient.SpannerClient, 357 totalDeleted *atomic.Int64) { 358 ks := spanner.KeySetFromKeys(batch...) 359 ksSize := len(batch) 360 eg.Go(func() error { 361 mutation := spanner.Delete("files", ks) 362 _, err := client.Apply(ctx, []*spanner.Mutation{mutation}) 363 if err == nil { 364 totalDeleted.Add(int64(ksSize)) 365 } 366 return err 367 }) 368 } 369 370 type FileCoverageWithDetails struct { 371 Subsystem string 372 Filepath string 373 Instrumented int64 374 Covered int64 375 TimePeriod TimePeriod `spanner:"-"` 376 Commit string 377 Subsystems []string 378 } 379 380 type FileCoverageWithLineInfo struct { 381 FileCoverageWithDetails 382 LinesInstrumented []int64 383 HitCounts []int64 384 } 385 386 func (fc *FileCoverageWithLineInfo) CovMap() map[int]int64 { 387 return MakeCovMap(fc.LinesInstrumented, fc.HitCounts) 388 } 389 390 func MakeCovMap(keys, vals []int64) map[int]int64 { 391 res := map[int]int64{} 392 for i, key := range keys { 393 res[int(key)] = vals[i] 394 } 395 return res 396 } 397 398 type SelectScope struct { 399 Ns string 400 Subsystem string 401 Manager string 402 Periods []TimePeriod 403 } 404 405 // FilesCoverageStream streams information about all the line coverage. 406 // It is expensive and better to be used for time insensitive operations. 407 func FilesCoverageStream(ctx context.Context, client spannerclient.SpannerClient, scope *SelectScope, 408 ) (<-chan *FileCoverageWithLineInfo, <-chan error) { 409 iter := client.Single().Query(ctx, 410 filesCoverageWithDetailsStmt(scope, true)) 411 resCh := make(chan *FileCoverageWithLineInfo) 412 errCh := make(chan error) 413 go func() { 414 defer close(errCh) 415 defer close(resCh) 416 defer iter.Stop() 417 if err := readIterToChan(ctx, iter, resCh); err != nil { 418 errCh <- fmt.Errorf("readIterToChan: %w", err) 419 } 420 }() 421 return resCh, errCh 422 } 423 424 // FilesCoverageWithDetails fetches the data directly from DB. No caching. 425 // Flag onlyUnique is quite expensive. 426 func FilesCoverageWithDetails( 427 ctx context.Context, client spannerclient.SpannerClient, scope *SelectScope, onlyUnique bool, 428 ) ([]*FileCoverageWithDetails, error) { 429 var res []*FileCoverageWithDetails 430 for _, timePeriod := range scope.Periods { 431 needLinesDetails := onlyUnique 432 iterManager := client.Single().Query(ctx, 433 filesCoverageWithDetailsStmt(&SelectScope{ 434 Ns: scope.Ns, 435 Subsystem: scope.Subsystem, 436 Manager: scope.Manager, 437 Periods: []TimePeriod{timePeriod}, 438 }, needLinesDetails)) 439 defer iterManager.Stop() 440 441 var err error 442 var periodRes []*FileCoverageWithDetails 443 if onlyUnique { 444 iterAll := client.Single().Query(ctx, 445 filesCoverageWithDetailsStmt(&SelectScope{ 446 Ns: scope.Ns, 447 Subsystem: scope.Subsystem, 448 Manager: "", 449 Periods: []TimePeriod{timePeriod}, 450 }, needLinesDetails)) 451 defer iterAll.Stop() 452 periodRes, err = readCoverageUniq(iterAll, iterManager) 453 if err != nil { 454 return nil, fmt.Errorf("uniqueFilesCoverageWithDetails: %w", err) 455 } 456 } else { 457 periodRes, err = readCoverage(ctx, iterManager) 458 if err != nil { 459 return nil, fmt.Errorf("readCoverage: %w", err) 460 } 461 } 462 for _, r := range periodRes { 463 r.TimePeriod = timePeriod 464 } 465 res = append(res, periodRes...) 466 } 467 return res, nil 468 } 469 470 func filesCoverageWithDetailsStmt(scope *SelectScope, withLines bool) spanner.Statement { 471 manager := scope.Manager 472 if manager == "" { 473 manager = "*" 474 } 475 selectColumns := "commit, instrumented, covered, files.filepath, subsystems" 476 if withLines { 477 selectColumns += ", linesinstrumented, hitcounts" 478 } 479 stmt := spanner.Statement{ 480 SQL: "select " + selectColumns + ` 481 from merge_history 482 join files 483 on merge_history.session = files.session 484 join file_subsystems 485 on merge_history.namespace = file_subsystems.namespace and files.filepath = file_subsystems.filepath 486 where 487 merge_history.namespace=$1 and dateto=$2 and duration=$3 and manager=$4`, 488 Params: map[string]interface{}{ 489 "p1": scope.Ns, 490 "p2": scope.Periods[0].DateTo, 491 "p3": scope.Periods[0].Days, 492 "p4": manager, 493 }, 494 } 495 if scope.Subsystem != "" { 496 stmt.SQL += " and $5=ANY(subsystems)" 497 stmt.Params["p5"] = scope.Subsystem 498 } 499 stmt.SQL += "\norder by files.filepath" 500 return stmt 501 } 502 503 func readCoverage(ctx context.Context, iterManager spannerclient.RowIterator) ([]*FileCoverageWithDetails, error) { 504 res := []*FileCoverageWithDetails{} 505 ch := make(chan *FileCoverageWithDetails) 506 var err error 507 go func() { 508 defer close(ch) 509 err = readIterToChan(ctx, iterManager, ch) 510 }() 511 for fc := range ch { 512 res = append(res, fc) 513 } 514 if err != nil { 515 return nil, fmt.Errorf("readIterToChan: %w", err) 516 } 517 return res, nil 518 } 519 520 // Unique coverage from specific manager is more expensive to get. 521 // We get unique coverage comparing manager and total coverage on the AppEngine side. 522 func readCoverageUniq(full, mgr spannerclient.RowIterator, 523 ) ([]*FileCoverageWithDetails, error) { 524 eg, ctx := errgroup.WithContext(context.Background()) 525 fullCh := make(chan *FileCoverageWithLineInfo) 526 eg.Go(func() error { 527 defer close(fullCh) 528 return readIterToChan(ctx, full, fullCh) 529 }) 530 partCh := make(chan *FileCoverageWithLineInfo) 531 eg.Go(func() error { 532 defer close(partCh) 533 return readIterToChan(ctx, mgr, partCh) 534 }) 535 res := []*FileCoverageWithDetails{} 536 eg.Go(func() error { 537 partCov := <-partCh 538 for fullCov := range fullCh { 539 if partCov == nil || partCov.Filepath > fullCov.Filepath { 540 // No pair for the file in full aggregation is available. 541 cov := fullCov.FileCoverageWithDetails 542 cov.Covered = 0 543 res = append(res, &cov) 544 continue 545 } 546 if partCov.Filepath == fullCov.Filepath { 547 if partCov.Commit != fullCov.Commit || 548 !IsComparable( 549 fullCov.LinesInstrumented, fullCov.HitCounts, 550 partCov.LinesInstrumented, partCov.HitCounts) { 551 return fmt.Errorf("db record for file %s doesn't match", fullCov.Filepath) 552 } 553 resItem := fullCov.FileCoverageWithDetails // Use Instrumented count from full aggregation. 554 resItem.Covered = 0 555 for _, hc := range UniqCoverage(fullCov.CovMap(), partCov.CovMap()) { 556 if hc > 0 { 557 resItem.Covered++ 558 } 559 } 560 res = append(res, &resItem) 561 partCov = <-partCh 562 continue 563 } 564 // Partial coverage is a subset of full coverage. 565 // File can't exist only in partial set. 566 return fmt.Errorf("currupted db, file %s can't exist", partCov.Filepath) 567 } 568 return nil 569 }) 570 if err := eg.Wait(); err != nil { 571 return nil, fmt.Errorf("eg.Wait: %w", err) 572 } 573 return res, nil 574 } 575 576 func readIterToChan[K FileCoverageWithLineInfo | FileCoverageWithDetails]( 577 ctx context.Context, iter spannerclient.RowIterator, ch chan<- *K) error { 578 for { 579 row, err := iter.Next() 580 if err == iterator.Done { 581 break 582 } 583 if err != nil { 584 return fmt.Errorf("iter.Next: %w", err) 585 } 586 var r K 587 if err = row.ToStruct(&r); err != nil { 588 return fmt.Errorf("row.ToStruct: %w", err) 589 } 590 select { 591 case ch <- &r: 592 case <-ctx.Done(): 593 return nil 594 } 595 } 596 return nil 597 } 598 599 func IsComparable(fullLines, fullHitCounts, partialLines, partialHitCounts []int64) bool { 600 if len(fullLines) != len(fullHitCounts) || 601 len(partialLines) != len(partialHitCounts) || 602 len(fullLines) < len(partialLines) { 603 return false 604 } 605 fullCov := MakeCovMap(fullLines, fullHitCounts) 606 for iPartial, ln := range partialLines { 607 partialHitCount := partialHitCounts[iPartial] 608 if fullHitCount, fullExist := fullCov[int(ln)]; !fullExist || fullHitCount < partialHitCount { 609 return false 610 } 611 } 612 return true 613 } 614 615 // Returns partial hitcounts that are the only source of the full hitcounts. 616 func UniqCoverage(fullCov, partCov map[int]int64) map[int]int64 { 617 res := maps.Clone(partCov) 618 for ln := range partCov { 619 if partCov[ln] != fullCov[ln] { 620 res[ln] = 0 621 } 622 } 623 return res 624 } 625 626 func RegenerateSubsystems(ctx context.Context, ns string, sss []*subsystem.Subsystem, 627 client spannerclient.SpannerClient) (int, error) { 628 ssMatcher := subsystem.MakePathMatcher(sss) 629 ssCache := make(map[string][]string) 630 filePaths, err := getFilePaths(ctx, ns, client) 631 if err != nil { 632 return 0, err 633 } 634 var mutations []*spanner.Mutation 635 for _, filePath := range filePaths { 636 subsystems := getFileSubsystems(filePath, ssMatcher, ssCache) 637 mutations = append(mutations, fileSubsystemsMutation(ns, filePath, subsystems)) 638 } 639 // There is a limit on the number of mutations per transaction (80k) imposed by the DB. 640 // Expected mutations count is < 20k and looks safe to do w/o batching. 641 if _, err = client.Apply(ctx, mutations); err != nil { 642 return 0, err 643 } 644 return len(mutations), nil 645 } 646 647 func getFilePaths(ctx context.Context, ns string, client spannerclient.SpannerClient) ([]string, error) { 648 iter := client.Single().Query(ctx, spanner.Statement{ 649 SQL: `select filepath from file_subsystems where namespace=$1`, 650 Params: map[string]interface{}{ 651 "p1": ns, 652 }, 653 }) 654 defer iter.Stop() 655 656 var res []string 657 for { 658 row, err := iter.Next() 659 if err == iterator.Done { 660 break 661 } 662 if err != nil { 663 return nil, fmt.Errorf("iter.Next: %w", err) 664 } 665 var r struct { 666 Filepath string 667 } 668 if err = row.ToStruct(&r); err != nil { 669 return nil, fmt.Errorf("row.ToStruct: %w", err) 670 } 671 res = append(res, r.Filepath) 672 } 673 return res, nil 674 }