go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/rpc/clusters.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package rpc 16 17 import ( 18 "context" 19 "encoding/hex" 20 "fmt" 21 "time" 22 23 "google.golang.org/protobuf/types/known/timestamppb" 24 25 "go.chromium.org/luci/common/data/stringset" 26 "go.chromium.org/luci/common/errors" 27 "go.chromium.org/luci/common/logging" 28 "go.chromium.org/luci/common/sync/parallel" 29 "go.chromium.org/luci/resultdb/rdbperms" 30 31 "go.chromium.org/luci/analysis/internal/aip" 32 "go.chromium.org/luci/analysis/internal/analysis" 33 "go.chromium.org/luci/analysis/internal/analysis/metrics" 34 "go.chromium.org/luci/analysis/internal/clustering" 35 "go.chromium.org/luci/analysis/internal/clustering/algorithms" 36 "go.chromium.org/luci/analysis/internal/clustering/reclustering" 37 "go.chromium.org/luci/analysis/internal/clustering/rules/cache" 38 "go.chromium.org/luci/analysis/internal/clustering/runs" 39 "go.chromium.org/luci/analysis/internal/config/compiledcfg" 40 "go.chromium.org/luci/analysis/internal/perms" 41 "go.chromium.org/luci/analysis/pbutil" 42 pb "go.chromium.org/luci/analysis/proto/v1" 43 ) 44 45 // MaxClusterRequestSize is the maximum number of test results to cluster in 46 // one call to Cluster(...). 47 const MaxClusterRequestSize = 1000 48 49 // MaxBatchGetClustersRequestSize is the maximum number of clusters to obtain 50 // impact for in one call to BatchGetClusters(). 51 const MaxBatchGetClustersRequestSize = 1000 52 53 type AnalysisClient interface { 54 ReadCluster(ctx context.Context, luciProject string, clusterID clustering.ClusterID) (*analysis.Cluster, error) 55 ReadClusterFailures(ctx context.Context, options analysis.ReadClusterFailuresOptions) (cfs []*analysis.ClusterFailure, err error) 56 ReadClusterExoneratedTestVariants(ctx context.Context, options analysis.ReadClusterExoneratedTestVariantsOptions) (tvs []*analysis.ExoneratedTestVariant, err error) 57 ReadClusterExoneratedTestVariantBranches(ctx context.Context, options analysis.ReadClusterExoneratedTestVariantBranchesOptions) (tvbs []*analysis.ExoneratedTestVariantBranch, err error) 58 ReadClusterHistory(ctx context.Context, options analysis.ReadClusterHistoryOptions) (ret []*analysis.ReadClusterHistoryDay, err error) 59 QueryClusterSummaries(ctx context.Context, luciProject string, options *analysis.QueryClusterSummariesOptions) ([]*analysis.ClusterSummary, error) 60 } 61 62 type clustersServer struct { 63 analysisClient AnalysisClient 64 } 65 66 func NewClustersServer(analysisClient AnalysisClient) *pb.DecoratedClusters { 67 return &pb.DecoratedClusters{ 68 Prelude: checkAllowedPrelude, 69 Service: &clustersServer{analysisClient: analysisClient}, 70 Postlude: gRPCifyAndLogPostlude, 71 } 72 } 73 74 // Cluster clusters a list of test failures. See proto definition for more. 75 func (*clustersServer) Cluster(ctx context.Context, req *pb.ClusterRequest) (*pb.ClusterResponse, error) { 76 if err := pbutil.ValidateProject(req.Project); err != nil { 77 return nil, invalidArgumentError(errors.Annotate(err, "project").Err()) 78 } 79 // We could make an implementation that gracefully degrades if 80 // perms.PermGetRule is not available (i.e. by not returning the 81 // bug associated with a rule cluster), but there is currently no point. 82 // All LUCI Analysis roles currently always grants both permissions 83 // together. 84 if err := perms.VerifyProjectPermissions(ctx, req.Project, perms.PermGetClustersByFailure, perms.PermGetRule); err != nil { 85 return nil, err 86 } 87 88 if len(req.TestResults) > MaxClusterRequestSize { 89 return nil, invalidArgumentError(fmt.Errorf( 90 "too many test results: at most %v test results can be clustered in one request", MaxClusterRequestSize)) 91 } 92 93 failures := make([]*clustering.Failure, 0, len(req.TestResults)) 94 for i, tr := range req.TestResults { 95 if err := validateTestResult(i, tr); err != nil { 96 return nil, err 97 } 98 failures = append(failures, &clustering.Failure{ 99 TestID: tr.TestId, 100 Reason: tr.FailureReason, 101 }) 102 } 103 104 // Fetch a recent project configuration. 105 // (May be a recent value that was cached.) 106 cfg, err := readProjectConfig(ctx, req.Project) 107 if err != nil { 108 return nil, err 109 } 110 111 // Fetch a recent ruleset. 112 ruleset, err := reclustering.Ruleset(ctx, req.Project, cache.StrongRead) 113 if err != nil { 114 return nil, err 115 } 116 117 // Perform clustering from scratch. (Incremental clustering does not make 118 // sense for this RPC.) 119 existing := algorithms.NewEmptyClusterResults(len(req.TestResults)) 120 121 results := algorithms.Cluster(cfg, ruleset, existing, failures) 122 123 // Construct the response proto. 124 clusteredTRs := make([]*pb.ClusterResponse_ClusteredTestResult, 0, len(results.Clusters)) 125 for i, r := range results.Clusters { 126 request := req.TestResults[i] 127 128 entries := make([]*pb.ClusterResponse_ClusteredTestResult_ClusterEntry, 0, len(r)) 129 for _, clusterID := range r { 130 entry := &pb.ClusterResponse_ClusteredTestResult_ClusterEntry{ 131 ClusterId: createClusterIdPB(clusterID), 132 } 133 if clusterID.IsBugCluster() { 134 // For bug clusters, the ID of the cluster is also the ID of 135 // the rule that defines it. Use this property to lookup the 136 // associated rule. 137 ruleID := clusterID.ID 138 rule := ruleset.ActiveRulesByID[ruleID] 139 entry.Bug = createAssociatedBugPB(rule.Rule.BugID, cfg.Config) 140 } 141 entries = append(entries, entry) 142 } 143 clusteredTR := &pb.ClusterResponse_ClusteredTestResult{ 144 RequestTag: request.RequestTag, 145 Clusters: entries, 146 } 147 clusteredTRs = append(clusteredTRs, clusteredTR) 148 } 149 150 version := &pb.ClusteringVersion{ 151 AlgorithmsVersion: int32(results.AlgorithmsVersion), 152 RulesVersion: timestamppb.New(results.RulesVersion), 153 ConfigVersion: timestamppb.New(results.ConfigVersion), 154 } 155 156 return &pb.ClusterResponse{ 157 ClusteredTestResults: clusteredTRs, 158 ClusteringVersion: version, 159 }, nil 160 } 161 162 func validateTestResult(i int, tr *pb.ClusterRequest_TestResult) error { 163 if tr.TestId == "" { 164 return invalidArgumentError(fmt.Errorf("test result %v: test ID must not be empty", i)) 165 } 166 return nil 167 } 168 169 func (c *clustersServer) Get(ctx context.Context, req *pb.GetClusterRequest) (*pb.Cluster, error) { 170 project, clusterID, err := parseClusterName(req.Name) 171 if err != nil { 172 return nil, invalidArgumentError(errors.Annotate(err, "name").Err()) 173 } 174 175 if err := perms.VerifyProjectPermissions(ctx, project, perms.PermGetCluster); err != nil { 176 return nil, err 177 } 178 179 cfg, err := readProjectConfig(ctx, project) 180 if err != nil { 181 return nil, err 182 } 183 184 cluster, err := c.analysisClient.ReadCluster(ctx, project, clusterID) 185 if err != nil { 186 return nil, err 187 } 188 189 readableRealms, err := perms.QueryRealms(ctx, project, nil, rdbperms.PermListTestResults) 190 if err != nil { 191 return nil, err 192 } 193 readableRealmsSet := stringset.NewFromSlice(readableRealms...) 194 195 exists := len(cluster.Realms) > 0 196 result := &pb.Cluster{ 197 Name: req.Name, 198 HasExample: exists, 199 Metrics: make(map[string]*pb.Cluster_TimewiseCounts), 200 } 201 for metricID, metricValue := range cluster.MetricValues { 202 result.Metrics[string(metricID)] = createTimewiseCountsPB(metricValue) 203 } 204 205 if !clusterID.IsBugCluster() && exists { 206 example := &clustering.Failure{ 207 TestID: cluster.ExampleTestID(), 208 Reason: &pb.FailureReason{ 209 PrimaryErrorMessage: cluster.ExampleFailureReason.StringVal, 210 }, 211 } 212 213 // Whether the user has access to at least one test result in the cluster. 214 canSeeAtLeastOneExample := false 215 for _, r := range cluster.Realms { 216 if readableRealmsSet.Has(r) { 217 canSeeAtLeastOneExample = true 218 break 219 } 220 } 221 if canSeeAtLeastOneExample { 222 // While the user has access to at least one test result in the cluster, 223 // they may not have access to the randomly selected example we retrieved 224 // from the cluster_summaries table. Therefore, we must be careful not 225 // to disclose any aspect of this example other than the 226 // clustering key it has in common with all other examples 227 // in the cluster. 228 hasAccessToGivenExample := false 229 result.Title = suggestedClusterTitle(cluster.ClusterID, example, hasAccessToGivenExample, cfg) 230 result.EquivalentFailureAssociationRule = failureAssociationRule(cluster.ClusterID, example, cfg) 231 } 232 } 233 234 return result, nil 235 } 236 237 func createTimewiseCountsPB(counts metrics.TimewiseCounts) *pb.Cluster_TimewiseCounts { 238 return &pb.Cluster_TimewiseCounts{ 239 OneDay: createCountsPB(counts.OneDay), 240 ThreeDay: createCountsPB(counts.ThreeDay), 241 SevenDay: createCountsPB(counts.SevenDay), 242 } 243 } 244 245 func createCountsPB(counts metrics.Counts) *pb.Cluster_Counts { 246 return &pb.Cluster_Counts{Nominal: counts.Nominal} 247 } 248 249 // failureAssociationRule returns the failure association rule for the 250 // given cluster ID, assuming the provided example is still a current 251 // example of the cluster. 252 // It is assumed the user does not have access to the specific test 253 // result represented by exampleFailure, but does have access to at 254 // least one other test result in the cluster. As such, this method 255 // must only return aspects of the test result which are common 256 // to all test results in this cluster. 257 func failureAssociationRule(clusterID clustering.ClusterID, exampleFailure *clustering.Failure, cfg *compiledcfg.ProjectConfig) string { 258 // Ignore error, it is only returned if algorithm cannot be found. 259 alg, _ := algorithms.SuggestingAlgorithm(clusterID.Algorithm) 260 if alg != nil { 261 // Check the example is still in the cluster. Sometimes cluster 262 // examples are stale (e.g. because cluster configuration has 263 // changed and re-clustering is yet to be fully complete and 264 // reflected in the cluster_summaries table). 265 // 266 // If the example is stale, it cannot be used as the basis for 267 // deriving the failure association rule to show to the user. 268 // This is for two reasons: 269 // 1) Functionality. The rule derived from the example 270 // would not be the correct rule for this cluster. 271 // 2) Security. The example failure provided may not be from a realm 272 // the user has access to. As a result of a configuration change, 273 // it may now be in a new cluster. 274 // There is no guarantee the user has access to any test results 275 // in this new cluster, even if it contains some of the test results 276 // of the old cluster, which the user could see some examples of. 277 // The failure association rule for the new cluster is one that the 278 // user may not be allowed to see. 279 exampleClusterID := hex.EncodeToString(alg.Cluster(cfg, exampleFailure)) 280 if exampleClusterID == clusterID.ID { 281 return alg.FailureAssociationRule(cfg, exampleFailure) 282 } 283 } 284 return "" 285 } 286 287 // suggestedClusterTitle returns a human-readable description of the cluster, 288 // using an example failure to help recover the unhashed clustering key. 289 // hasAccessToGivenExample indicates if the user has permission to see the specific 290 // example of the cluster (exampleFailure), or (if false) whether they can 291 // only see one example (but not necessarily exampleFailure). 292 // If it is false, the result of this method will not contain any aspects 293 // of the test result other than the aspects which are common to all other 294 // test results in the cluster (i.e. the clustering key). 295 func suggestedClusterTitle(clusterID clustering.ClusterID, exampleFailure *clustering.Failure, hasAccessToGivenExample bool, cfg *compiledcfg.ProjectConfig) string { 296 // Ignore error, it is only returned if algorithm cannot be found. 297 alg, _ := algorithms.SuggestingAlgorithm(clusterID.Algorithm) 298 if alg != nil { 299 // Check the example is still in the cluster. Sometimes cluster 300 // examples are stale (e.g. because cluster configuration has 301 // changed and re-clustering is yet to be fully complete and 302 // reflected in the cluster_summaries table). 303 // 304 // If the example is stale, it cannot be used as the basis for 305 // deriving the clustering key (cluster definition) to show to 306 // the user. This is for two reasons: 307 // 1) Functionality. The clustering key derived from the example 308 // would not be the correct clustering key for this cluster. 309 // 2) Security. The example failure provided may not be from a realm 310 // the user has access to. As a result of a configuration change, 311 // it may now be in a new cluster. 312 // There is no guarantee the user has access to any test results 313 // in this new cluster, even if it contains some of the test results 314 // of the current cluster, which the user could see some examples of. 315 // The failure association rule for the new cluster is one that the 316 // user may not be allowed to see. 317 exampleClusterID := hex.EncodeToString(alg.Cluster(cfg, exampleFailure)) 318 if exampleClusterID == clusterID.ID { 319 return alg.ClusterTitle(cfg, exampleFailure) 320 } 321 } 322 // Fallback. 323 if hasAccessToGivenExample { 324 // The user has access to the specific test result used as an example. 325 // We are fine to disclose it; we do not have to rely on sanitising it 326 // down to the common clustering key. 327 if clusterID.IsTestNameCluster() { 328 // Fallback for old test name clusters. 329 return exampleFailure.TestID 330 } 331 if clusterID.IsFailureReasonCluster() { 332 // Fallback for old reason-based clusters. 333 return exampleFailure.Reason.PrimaryErrorMessage 334 } 335 } 336 // Fallback for all other cases. 337 return "(definition unavailable due to ongoing reclustering)" 338 } 339 340 func (c *clustersServer) GetReclusteringProgress(ctx context.Context, req *pb.GetReclusteringProgressRequest) (*pb.ReclusteringProgress, error) { 341 project, err := parseReclusteringProgressName(req.Name) 342 if err != nil { 343 return nil, invalidArgumentError(errors.Annotate(err, "name").Err()) 344 } 345 // Getting reclustering progress is considered part of getting a cluster: 346 // whenever you retrieve a cluster, you should be able to tell if the 347 // information you are reading is up to date. 348 if err := perms.VerifyProjectPermissions(ctx, project, perms.PermGetCluster); err != nil { 349 return nil, err 350 } 351 352 progress, err := runs.ReadReclusteringProgress(ctx, project) 353 if err != nil { 354 return nil, err 355 } 356 357 return &pb.ReclusteringProgress{ 358 Name: req.Name, 359 ProgressPerMille: int32(progress.ProgressPerMille), 360 Last: &pb.ClusteringVersion{ 361 AlgorithmsVersion: int32(progress.Last.AlgorithmsVersion), 362 RulesVersion: timestamppb.New(progress.Last.RulesVersion), 363 ConfigVersion: timestamppb.New(progress.Last.ConfigVersion), 364 }, 365 Next: &pb.ClusteringVersion{ 366 AlgorithmsVersion: int32(progress.Next.AlgorithmsVersion), 367 RulesVersion: timestamppb.New(progress.Next.RulesVersion), 368 ConfigVersion: timestamppb.New(progress.Next.ConfigVersion), 369 }, 370 }, nil 371 } 372 373 func (c *clustersServer) QueryClusterSummaries(ctx context.Context, req *pb.QueryClusterSummariesRequest) (*pb.QueryClusterSummariesResponse, error) { 374 if err := pbutil.ValidateProject(req.Project); err != nil { 375 return nil, invalidArgumentError(errors.Annotate(err, "project").Err()) 376 } 377 378 if err := pbutil.ValidateTimeRange(ctx, req.TimeRange); err != nil { 379 err = errors.Annotate(err, "time_range").Err() 380 return nil, invalidArgumentError(err) 381 } 382 383 // TODO(b/239768873): Provide some sort of fallback for users who do not 384 // have permission to run expensive queries if no filters are applied. 385 386 // We could make an implementation that gracefully deals with the situation 387 // where the user does not have perms.PermGetRule, but there is currently 388 // no point as the LUCI Analysis reader role currently always grants 389 // PermGetRule with PermListClusters. 390 if err := perms.VerifyProjectPermissions(ctx, req.Project, perms.PermListClusters, perms.PermGetRule); err != nil { 391 return nil, err 392 } 393 canSeeRuleDefinition, err := perms.HasProjectPermission(ctx, req.Project, perms.PermGetRuleDefinition) 394 if err != nil { 395 return nil, err 396 } 397 398 // Fetch a recent project configuration. 399 // (May be a recent value that was cached.) 400 cfg, err := readProjectConfig(ctx, req.Project) 401 if err != nil { 402 return nil, err 403 } 404 405 view := req.View 406 if view == pb.ClusterSummaryView_CLUSTER_SUMMARY_VIEW_UNSPECIFIED { 407 view = pb.ClusterSummaryView_BASIC 408 } 409 var includeMetricBreakdown = view == pb.ClusterSummaryView_FULL 410 411 var ruleset *cache.Ruleset 412 var clusters []*analysis.ClusterSummary 413 var bqErr error 414 // Parallelise call to Biquery (slow call) 415 // with the datastore/spanner calls to reduce the critical path. 416 err = parallel.FanOutIn(func(ch chan<- func() error) { 417 ch <- func() error { 418 start := time.Now() 419 var err error 420 421 // Fetch a recent ruleset. 422 ruleset, err = reclustering.Ruleset(ctx, req.Project, cache.StrongRead) 423 if err != nil { 424 return err 425 } 426 logging.Infof(ctx, "QueryClusterSummaries: Ruleset part took %v", time.Since(start)) 427 return nil 428 } 429 ch <- func() error { 430 start := time.Now() 431 // To avoid the error returned from the service being non-deterministic 432 // if both goroutines error, populate any error encountered here 433 // into bqErr and return no error. 434 opts := &analysis.QueryClusterSummariesOptions{ 435 TimeRange: req.TimeRange, 436 IncludeMetricBreakdown: includeMetricBreakdown, 437 } 438 var err error 439 440 opts.FailureFilter, err = aip.ParseFilter(req.FailureFilter) 441 if err != nil { 442 bqErr = invalidArgumentError(errors.Annotate(err, "failure_filter").Err()) 443 return nil 444 } 445 opts.OrderBy, err = aip.ParseOrderBy(req.OrderBy) 446 if err != nil { 447 bqErr = invalidArgumentError(errors.Annotate(err, "order_by").Err()) 448 return nil 449 } 450 opts.Metrics, err = metricsByName(req.Project, cfg, req.Metrics) 451 if err != nil { 452 bqErr = invalidArgumentError(errors.Annotate(err, "metrics").Err()) 453 return nil 454 } 455 opts.Realms, err = perms.QueryRealmsNonEmpty(ctx, req.Project, nil, perms.ListTestResultsAndExonerations...) 456 if err != nil { 457 bqErr = err 458 return nil 459 } 460 461 clusters, err = c.analysisClient.QueryClusterSummaries(ctx, req.Project, opts) 462 if err != nil { 463 if analysis.InvalidArgumentTag.In(err) { 464 bqErr = invalidArgumentError(err) 465 return nil 466 } 467 bqErr = errors.Annotate(err, "query clusters for failures").Err() 468 return nil 469 } 470 logging.Infof(ctx, "QueryClusterSummaries: BigQuery part took %v", time.Since(start)) 471 return nil 472 } 473 }) 474 if err != nil { 475 return nil, err 476 } 477 // To avoid the error returned from the service being non-deterministic 478 // if both goroutines error, return error from bigQuery part after any other errors. 479 if bqErr != nil { 480 return nil, bqErr 481 } 482 483 result := []*pb.ClusterSummary{} 484 for _, c := range clusters { 485 cs := &pb.ClusterSummary{ 486 ClusterId: createClusterIdPB(c.ClusterID), 487 Metrics: make(map[string]*pb.ClusterSummary_MetricValue), 488 } 489 for id, metricValue := range c.MetricValues { 490 cs.Metrics[string(id)] = &pb.ClusterSummary_MetricValue{ 491 Value: metricValue.Value, 492 DailyBreakdown: metricValue.DailyBreakdown, 493 } 494 } 495 496 if c.ClusterID.IsBugCluster() { 497 ruleID := c.ClusterID.ID 498 rule := ruleset.ActiveRulesByID[ruleID] 499 if rule != nil { 500 cs.Bug = createAssociatedBugPB(rule.Rule.BugID, cfg.Config) 501 if canSeeRuleDefinition { 502 cs.Title = rule.Rule.RuleDefinition 503 } else { 504 // Because the query is limited to running over the test 505 // failures the user has access to, they have permission 506 // to see the example Test ID for the cluster. 507 508 // Attempt to provide a description of the failures matched 509 // by the rule from the data the user can see, without 510 // revealing the content of the rule itself. 511 cs.Title = fmt.Sprintf("Selected failures in %s", c.ExampleTestID) 512 if c.UniqueTestIDs > 1 { 513 cs.Title += fmt.Sprintf(" (and %v more)", c.UniqueTestIDs-1) 514 } 515 } 516 } else { 517 // Rule is inactive / in process of being archived. 518 cs.Title = "(rule archived)" 519 } 520 } else { 521 example := &clustering.Failure{ 522 TestID: c.ExampleTestID, 523 Reason: &pb.FailureReason{ 524 PrimaryErrorMessage: c.ExampleFailureReason.StringVal, 525 }, 526 } 527 // Because QueryClusterSummaries only reads failures the user has 528 // access to, the example is one the user has access to, and 529 // so we can use it for the title. 530 hasAccessToGivenExample := true 531 cs.Title = suggestedClusterTitle(c.ClusterID, example, hasAccessToGivenExample, cfg) 532 } 533 534 result = append(result, cs) 535 } 536 return &pb.QueryClusterSummariesResponse{ClusterSummaries: result}, nil 537 } 538 539 // metricsByName retrieves the metrics with the given name from a 540 // given LUCI Project and configuration. If the metric is not 541 // from the given LUCI Project, an error will be returned. 542 func metricByName(project string, cfg *compiledcfg.ProjectConfig, name string) (metrics.Definition, error) { 543 metricProject, id, err := parseProjectMetricName(name) 544 if err != nil { 545 return metrics.Definition{}, err 546 } 547 if metricProject != project { 548 return metrics.Definition{}, errors.Reason("metric %s cannot be used as it is from a different LUCI Project", name).Err() 549 } 550 metric, err := metrics.ByID(id) 551 if err != nil { 552 return metrics.Definition{}, err 553 } 554 return metric.AdaptToProject(project, cfg.Config.Metrics), nil 555 } 556 557 // metricsByName retrieves the metrics with the given names from a 558 // given LUCI Project and configuration. If the metrics are not 559 // from the given LUCI Project, an error will be returned. 560 func metricsByName(project string, cfg *compiledcfg.ProjectConfig, names []string) ([]metrics.Definition, error) { 561 results := make([]metrics.Definition, 0, len(names)) 562 for _, name := range names { 563 metric, err := metricByName(project, cfg, name) 564 if err != nil { 565 return nil, err 566 } 567 results = append(results, metric) 568 } 569 return results, nil 570 } 571 572 func (c *clustersServer) QueryClusterFailures(ctx context.Context, req *pb.QueryClusterFailuresRequest) (*pb.QueryClusterFailuresResponse, error) { 573 project, clusterID, err := parseClusterFailuresName(req.Parent) 574 if err != nil { 575 return nil, invalidArgumentError(errors.Annotate(err, "parent").Err()) 576 } 577 578 if err := perms.VerifyProjectPermissions(ctx, project, perms.PermGetCluster); err != nil { 579 return nil, err 580 } 581 582 // Fetch a recent project configuration. 583 // (May be a recent value that was cached.) 584 cfg, err := readProjectConfig(ctx, project) 585 if err != nil { 586 return nil, err 587 } 588 589 opts := analysis.ReadClusterFailuresOptions{ 590 Project: project, 591 ClusterID: clusterID, 592 } 593 opts.Realms, err = perms.QueryRealmsNonEmpty(ctx, project, nil, perms.ListTestResultsAndExonerations...) 594 if err != nil { 595 // If the user has permission in no realms, QueryRealmsNonEmpty 596 // will return an appstatus error PERMISSION_DENIED. 597 // Otherwise, e.g. in case AuthDB was unavailable, the error will 598 // not be an appstatus error and the client will get an internal 599 // server error. 600 return nil, err 601 } 602 if req.MetricFilter != "" { 603 metric, err := metricByName(project, cfg, req.MetricFilter) 604 if err != nil { 605 return nil, invalidArgumentError(errors.Annotate(err, "filter_metric").Err()) 606 } 607 opts.MetricFilter = &metric 608 } 609 610 failures, err := c.analysisClient.ReadClusterFailures(ctx, opts) 611 if err != nil { 612 return nil, errors.Annotate(err, "query cluster failures").Err() 613 } 614 response := &pb.QueryClusterFailuresResponse{} 615 for _, f := range failures { 616 response.Failures = append(response.Failures, createDistinctClusterFailurePB(f)) 617 } 618 619 return response, nil 620 } 621 622 func createDistinctClusterFailurePB(f *analysis.ClusterFailure) *pb.DistinctClusterFailure { 623 var exonerations []*pb.DistinctClusterFailure_Exoneration 624 for _, ex := range f.Exonerations { 625 exonerations = append(exonerations, &pb.DistinctClusterFailure_Exoneration{ 626 Reason: analysis.FromBQExonerationReason(ex.Reason.StringVal), 627 }) 628 } 629 630 var changelists []*pb.Changelist 631 for _, cl := range f.Changelists { 632 changelists = append(changelists, &pb.Changelist{ 633 Host: cl.Host.StringVal, 634 Change: cl.Change.Int64, 635 Patchset: int32(cl.Patchset.Int64), 636 }) 637 } 638 639 buildStatus := analysis.FromBQBuildStatus(f.BuildStatus.StringVal) 640 641 var presubmitRun *pb.DistinctClusterFailure_PresubmitRun 642 if f.PresubmitRunID != nil { 643 presubmitRun = &pb.DistinctClusterFailure_PresubmitRun{ 644 PresubmitRunId: &pb.PresubmitRunId{ 645 System: f.PresubmitRunID.System.StringVal, 646 Id: f.PresubmitRunID.ID.StringVal, 647 }, 648 Owner: f.PresubmitRunOwner.StringVal, 649 Mode: analysis.FromBQPresubmitRunMode(f.PresubmitRunMode.StringVal), 650 Status: analysis.FromBQPresubmitRunStatus(f.PresubmitRunStatus.StringVal), 651 } 652 } 653 654 return &pb.DistinctClusterFailure{ 655 TestId: f.TestID.StringVal, 656 Variant: createVariantPB(f.Variant), 657 PartitionTime: timestamppb.New(f.PartitionTime.Timestamp), 658 PresubmitRun: presubmitRun, 659 IsBuildCritical: f.IsBuildCritical.Bool, 660 Exonerations: exonerations, 661 BuildStatus: buildStatus, 662 IngestedInvocationId: f.IngestedInvocationID.StringVal, 663 IsIngestedInvocationBlocked: f.IsIngestedInvocationBlocked.Bool, 664 Changelists: changelists, 665 Count: f.Count, 666 } 667 } 668 669 func createVariantPB(variant []*analysis.Variant) *pb.Variant { 670 def := make(map[string]string) 671 for _, v := range variant { 672 def[v.Key.StringVal] = v.Value.StringVal 673 } 674 var result *pb.Variant 675 if len(def) > 0 { 676 result = &pb.Variant{Def: def} 677 } 678 return result 679 } 680 681 func (c *clustersServer) QueryExoneratedTestVariants(ctx context.Context, req *pb.QueryClusterExoneratedTestVariantsRequest) (*pb.QueryClusterExoneratedTestVariantsResponse, error) { 682 project, clusterID, err := parseClusterExoneratedTestVariantsName(req.Parent) 683 if err != nil { 684 return nil, invalidArgumentError(errors.Annotate(err, "parent").Err()) 685 } 686 687 if err := perms.VerifyProjectPermissions(ctx, project, perms.PermGetCluster); err != nil { 688 return nil, err 689 } 690 opts := analysis.ReadClusterExoneratedTestVariantsOptions{ 691 Project: project, 692 ClusterID: clusterID, 693 } 694 opts.Realms, err = perms.QueryRealmsNonEmpty(ctx, project, nil, perms.ListTestResultsAndExonerations...) 695 if err != nil { 696 // If the user has permission in no realms, QueryRealmsNonEmpty 697 // will return an appstatus error PERMISSION_DENIED. 698 // Otherwise, e.g. in case AuthDB was unavailable, the error will 699 // not be an appstatus error and the client will get an internal 700 // server error. 701 return nil, err 702 } 703 704 testVariants, err := c.analysisClient.ReadClusterExoneratedTestVariants(ctx, opts) 705 if err != nil { 706 return nil, errors.Annotate(err, "query exonerated test variants").Err() 707 } 708 response := &pb.QueryClusterExoneratedTestVariantsResponse{} 709 for _, f := range testVariants { 710 response.TestVariants = append(response.TestVariants, createClusterExoneratedTestVariant(f)) 711 } 712 713 return response, nil 714 } 715 716 func createClusterExoneratedTestVariant(tv *analysis.ExoneratedTestVariant) *pb.ClusterExoneratedTestVariant { 717 return &pb.ClusterExoneratedTestVariant{ 718 TestId: tv.TestID.StringVal, 719 Variant: createVariantPB(tv.Variant), 720 CriticalFailuresExonerated: tv.CriticalFailuresExonerated, 721 LastExoneration: timestamppb.New(tv.LastExoneration.Timestamp), 722 } 723 } 724 725 func (c *clustersServer) QueryExoneratedTestVariantBranches(ctx context.Context, req *pb.QueryClusterExoneratedTestVariantBranchesRequest) (*pb.QueryClusterExoneratedTestVariantBranchesResponse, error) { 726 project, clusterID, err := parseClusterExoneratedTestVariantBranchesName(req.Parent) 727 if err != nil { 728 return nil, invalidArgumentError(errors.Annotate(err, "parent").Err()) 729 } 730 731 if err := perms.VerifyProjectPermissions(ctx, project, perms.PermGetCluster); err != nil { 732 return nil, err 733 } 734 opts := analysis.ReadClusterExoneratedTestVariantBranchesOptions{ 735 Project: project, 736 ClusterID: clusterID, 737 } 738 opts.Realms, err = perms.QueryRealmsNonEmpty(ctx, project, nil, perms.ListTestResultsAndExonerations...) 739 if err != nil { 740 // If the user has permission in no realms, QueryRealmsNonEmpty 741 // will return an appstatus error PERMISSION_DENIED. 742 // Otherwise, e.g. in case AuthDB was unavailable, the error will 743 // not be an appstatus error and the client will get an internal 744 // server error. 745 return nil, err 746 } 747 748 testVariantBranches, err := c.analysisClient.ReadClusterExoneratedTestVariantBranches(ctx, opts) 749 if err != nil { 750 return nil, errors.Annotate(err, "query exonerated test variant branches").Err() 751 } 752 response := &pb.QueryClusterExoneratedTestVariantBranchesResponse{} 753 for _, tvb := range testVariantBranches { 754 response.TestVariantBranches = append(response.TestVariantBranches, createClusterExoneratedTestVariantBranch(tvb)) 755 } 756 757 return response, nil 758 } 759 760 func createClusterExoneratedTestVariantBranch(tv *analysis.ExoneratedTestVariantBranch) *pb.ClusterExoneratedTestVariantBranch { 761 return &pb.ClusterExoneratedTestVariantBranch{ 762 Project: tv.Project.StringVal, 763 TestId: tv.TestID.StringVal, 764 Variant: createVariantPB(tv.Variant), 765 SourceRef: createSourceRef(tv.SourceRef), 766 CriticalFailuresExonerated: tv.CriticalFailuresExonerated, 767 LastExoneration: timestamppb.New(tv.LastExoneration.Timestamp), 768 } 769 } 770 771 func createSourceRef(sourceRef analysis.SourceRef) *pb.SourceRef { 772 result := &pb.SourceRef{} 773 if sourceRef.Gitiles != nil { 774 result.System = &pb.SourceRef_Gitiles{ 775 Gitiles: &pb.GitilesRef{ 776 Host: sourceRef.Gitiles.Host.StringVal, 777 Project: sourceRef.Gitiles.Project.StringVal, 778 Ref: sourceRef.Gitiles.Ref.StringVal, 779 }, 780 } 781 } 782 return result 783 } 784 785 // QueryHistory clusters a list of test failures. See proto definition for more. 786 func (c *clustersServer) QueryHistory(ctx context.Context, req *pb.QueryClusterHistoryRequest) (*pb.QueryClusterHistoryResponse, error) { 787 if err := pbutil.ValidateProject(req.Project); err != nil { 788 return nil, invalidArgumentError(errors.Annotate(err, "project").Err()) 789 } 790 791 if err := perms.VerifyProjectPermissions(ctx, req.Project, perms.PermGetConfig); err != nil { 792 return nil, err 793 } 794 795 cfg, err := readProjectConfig(ctx, req.Project) 796 if err != nil { 797 return nil, err 798 } 799 800 opts := analysis.ReadClusterHistoryOptions{ 801 Project: req.Project, 802 Days: req.Days, 803 } 804 805 opts.FailureFilter, err = aip.ParseFilter(req.FailureFilter) 806 if err != nil { 807 return nil, invalidArgumentError(errors.Annotate(err, "failure_filter").Err()) 808 } 809 810 opts.Metrics, err = metricsByName(req.Project, cfg, req.Metrics) 811 if err != nil { 812 return nil, invalidArgumentError(err) 813 } 814 815 realms, err := perms.QueryRealmsNonEmpty(ctx, req.Project, nil, perms.ListTestResultsAndExonerations...) 816 if err != nil { 817 // If the user has permission in no realms, QueryRealmsNonEmpty 818 // will return an appstatus error PERMISSION_DENIED. 819 // Otherwise, e.g. in case AuthDB was unavailable, the error will 820 // not be an appstatus error and the client will get an internal 821 // server error. 822 return nil, err 823 } 824 opts.Realms = realms 825 826 days, err := c.analysisClient.ReadClusterHistory(ctx, opts) 827 if err != nil { 828 return nil, errors.Annotate(err, "cluster history").Err() 829 } 830 831 response := &pb.QueryClusterHistoryResponse{} 832 if len(days) == 0 { 833 return response, nil 834 } 835 836 for _, day := range days { 837 metrics := make(map[string]int32) 838 for id, value := range day.MetricValues { 839 metrics[id.String()] = value 840 } 841 response.Days = append(response.Days, &pb.ClusterHistoryDay{ 842 Metrics: metrics, 843 Date: day.Date.Format("2006-01-02"), 844 }) 845 } 846 return response, nil 847 }