go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/throttle/throttle.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package throttle analysis current running reruns and send task to test failure detector. 16 package throttle 17 18 import ( 19 "context" 20 "time" 21 22 "go.chromium.org/luci/bisection/internal/config" 23 "go.chromium.org/luci/bisection/model" 24 pb "go.chromium.org/luci/bisection/proto/v1" 25 tpb "go.chromium.org/luci/bisection/task/proto" 26 "go.chromium.org/luci/bisection/testfailuredetection" 27 "go.chromium.org/luci/bisection/util" 28 buildbucketpb "go.chromium.org/luci/buildbucket/proto" 29 "go.chromium.org/luci/common/clock" 30 "go.chromium.org/luci/common/errors" 31 "go.chromium.org/luci/common/logging" 32 "go.chromium.org/luci/gae/service/datastore" 33 ) 34 35 const ( 36 // Rerun that is pending for more than 5 minutes should be 37 // considered as congested. 38 congestedPendingThreshold = -time.Minute * 5 39 // Rerun that is older than 7 days should be excluded. 40 // Because there maybe cases that for some reasons 41 // (e.g. crashes) that status may not be updated. 42 // Any reruns more than 7 days are surely canceled by buildbucket, so it is 43 // safe to exclude them. 44 cutoffThreshold = -time.Hour * 7 * 24 45 ) 46 47 func CronHandler(ctx context.Context) error { 48 projectsToProcess, err := config.SupportedProjects(ctx) 49 if err != nil { 50 return errors.Annotate(err, "supported projects").Err() 51 } 52 // TODO(beining@): We should continue to next iteration when there is an error. 53 // Because error in one project should not block other projects. 54 for _, project := range projectsToProcess { 55 count, err := dailyAnalysisCount(ctx, project) 56 if err != nil { 57 return errors.Annotate(err, "daily analysis count").Err() 58 } 59 dailyLimit, err := dailyLimit(ctx, project) 60 if err != nil { 61 return errors.Annotate(err, "daily limit").Err() 62 } 63 if count >= dailyLimit { 64 logging.Warningf(ctx, "%d reached daily limit %d for project %s", count, dailyLimit, project) 65 continue 66 } 67 rerunBuilds, err := congestedCompileReruns(ctx, project) 68 if err != nil { 69 return errors.Annotate(err, "obtain congested compile reruns").Err() 70 } 71 testReruns, err := congestedTestReruns(ctx, project) 72 if err != nil { 73 return errors.Annotate(err, "obtain congested test reruns").Err() 74 } 75 dimensionExcludes := []*pb.Dimension{} 76 for _, d := range allRerunDimensions(rerunBuilds, testReruns) { 77 if dim := util.GetDimensionWithKey(d, "os"); dim != nil { 78 dimensionExcludes = append(dimensionExcludes, dim) 79 } 80 } 81 util.SortDimension(dimensionExcludes) 82 task := &tpb.TestFailureDetectionTask{ 83 Project: project, 84 DimensionExcludes: dimensionExcludes, 85 } 86 if err := testfailuredetection.Schedule(ctx, task); err != nil { 87 return errors.Annotate(err, "schedule test failure detection task").Err() 88 } 89 logging.Infof(ctx, "Test failure detection task scheduled %v", task) 90 } 91 return nil 92 } 93 94 func dailyAnalysisCount(ctx context.Context, project string) (int, error) { 95 cutoffTime := clock.Now(ctx).Add(-time.Hour * 24) 96 q := datastore.NewQuery("TestFailureAnalysis").Eq("project", project).Gt("create_time", cutoffTime) 97 analyses := []*model.TestFailureAnalysis{} 98 err := datastore.GetAll(ctx, q, &analyses) 99 if err != nil { 100 return 0, errors.Annotate(err, "get analyses").Err() 101 } 102 count := 0 103 for _, tfa := range analyses { 104 if tfa.Status != pb.AnalysisStatus_DISABLED && tfa.Status != pb.AnalysisStatus_UNSUPPORTED { 105 count++ 106 } 107 } 108 return count, nil 109 } 110 111 func congestedCompileReruns(ctx context.Context, project string) ([]*model.SingleRerun, error) { 112 cutoffTime := clock.Now(ctx).Add(cutoffThreshold) 113 pendingCutoffTime := clock.Now(ctx).Add(congestedPendingThreshold) 114 q := datastore.NewQuery("CompileRerunBuild"). 115 Eq("status", buildbucketpb.Status_SCHEDULED). 116 Eq("project", project). 117 Gt("create_time", cutoffTime). 118 Lt("create_time", pendingCutoffTime) 119 rerunBuilds := []*model.CompileRerunBuild{} 120 err := datastore.GetAll(ctx, q, &rerunBuilds) 121 if err != nil { 122 return nil, errors.Annotate(err, "get scheduled CompileRerunBuilds").Err() 123 } 124 reruns := []*model.SingleRerun{} 125 for _, r := range rerunBuilds { 126 rerun := []*model.SingleRerun{} 127 q := datastore.NewQuery("SingleRerun").Eq("rerun_build", datastore.KeyForObj(ctx, r)) 128 err := datastore.GetAll(ctx, q, &rerun) 129 if err != nil { 130 return nil, errors.Annotate(err, "get rerun with CompileRerunBuilds ID %d", r.Id).Err() 131 } 132 reruns = append(reruns, rerun...) 133 } 134 return reruns, nil 135 } 136 137 func congestedTestReruns(ctx context.Context, project string) ([]*model.TestSingleRerun, error) { 138 cutoffTime := clock.Now(ctx).Add(cutoffThreshold) 139 pendingCutoffTime := clock.Now(ctx).Add(congestedPendingThreshold) 140 q := datastore.NewQuery("TestSingleRerun"). 141 Eq("luci_build.status", buildbucketpb.Status_SCHEDULED). 142 Eq("luci_build.project", project). 143 Gt("luci_build.create_time", cutoffTime). 144 Lt("luci_build.create_time", pendingCutoffTime) 145 reruns := []*model.TestSingleRerun{} 146 err := datastore.GetAll(ctx, q, &reruns) 147 if err != nil { 148 return nil, errors.Annotate(err, "get scheduled TestSingleRerun").Err() 149 } 150 return reruns, nil 151 } 152 153 func allRerunDimensions(rerunBuilds []*model.SingleRerun, testReruns []*model.TestSingleRerun) []*pb.Dimensions { 154 dims := []*pb.Dimensions{} 155 for _, r := range rerunBuilds { 156 dims = append(dims, r.Dimensions) 157 } 158 for _, r := range testReruns { 159 dims = append(dims, r.Dimensions) 160 } 161 return dims 162 } 163 164 func dailyLimit(ctx context.Context, project string) (int, error) { 165 cfg, err := config.Project(ctx, project) 166 if err != nil { 167 return 0, err 168 } 169 return (int)(cfg.TestAnalysisConfig.GetDailyLimit()), nil 170 }