sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/crier/controller.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package crier reports finished prowjob status to git providers. 18 package crier 19 20 import ( 21 "context" 22 "fmt" 23 "time" 24 25 "github.com/sirupsen/logrus" 26 "k8s.io/apimachinery/pkg/api/errors" 27 "k8s.io/client-go/util/workqueue" 28 "sigs.k8s.io/controller-runtime/pkg/builder" 29 ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client" 30 "sigs.k8s.io/controller-runtime/pkg/controller" 31 "sigs.k8s.io/controller-runtime/pkg/manager" 32 "sigs.k8s.io/controller-runtime/pkg/reconcile" 33 34 prowv1 "sigs.k8s.io/prow/pkg/apis/prowjobs/v1" 35 "sigs.k8s.io/prow/pkg/crier/reporters/criercommonlib" 36 ) 37 38 type ReportClient interface { 39 // Report reports a Prowjob. The provided logger is already populated with the 40 // prowjob name and the reporter name. 41 // If a reporter wants to defer reporting, it can return a reconcile.Result with a RequeueAfter 42 Report(ctx context.Context, log *logrus.Entry, pj *prowv1.ProwJob) ([]*prowv1.ProwJob, *reconcile.Result, error) 43 GetName() string 44 // ShouldReport determines if a ProwJob should be reported. The provided logger 45 // is already populated with the prowjob name and the reporter name. 46 ShouldReport(ctx context.Context, log *logrus.Entry, pj *prowv1.ProwJob) bool 47 } 48 49 // reconciler struct defines how a controller should encapsulate 50 // logging, client connectivity, informing (list and watching) 51 // queueing, and handling of resource changes 52 type reconciler struct { 53 pjclientset ctrlruntimeclient.Client 54 reporter ReportClient 55 enablementChecker func(org, repo string) bool 56 } 57 58 // New constructs a new instance of the crier reconciler. 59 func New( 60 mgr manager.Manager, 61 reporter ReportClient, 62 numWorkers int, 63 enablementChecker func(org, repo string) bool, 64 ) error { 65 if err := builder. 66 ControllerManagedBy(mgr). 67 // Is used for metrics, hence must be unique per controller instance 68 Named(fmt.Sprintf("crier_%s", reporter.GetName())). 69 For(&prowv1.ProwJob{}). 70 WithOptions(controller.Options{MaxConcurrentReconciles: numWorkers, 71 RateLimiter: workqueue.DefaultControllerRateLimiter()}). 72 Complete(&reconciler{ 73 pjclientset: mgr.GetClient(), 74 reporter: reporter, 75 enablementChecker: enablementChecker, 76 }); err != nil { 77 return fmt.Errorf("failed to construct controller: %w", err) 78 } 79 80 return nil 81 } 82 83 // Reconcile retrieves each queued item and takes the necessary handler action based off of if 84 // the item was created or deleted. 85 func (r *reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { 86 log := logrus.WithField("reporter", r.reporter.GetName()).WithField("key", req.String()).WithField("prowjob", req.Name) 87 log.Debug("processing next key") 88 result, err := r.reconcile(ctx, log, req) 89 if err != nil { 90 if criercommonlib.IsUserError(err) { 91 log.WithError(err).Debug("Reconciliation failed") 92 } else { 93 log.WithError(err).Error("Reconciliation failed") 94 } 95 } 96 if result == nil { 97 result = &reconcile.Result{} 98 } 99 return *result, err 100 } 101 102 func (r *reconciler) reconcile(ctx context.Context, log *logrus.Entry, req reconcile.Request) (*reconcile.Result, error) { 103 // Limit reconciliation time to 30 minutes. This should more than enough time 104 // for any reasonable reporter. Most reporters should set a stricter timeout 105 // themselves. This mainly helps avoid leaking reconciliation threads that 106 // will never complete. 107 ctx, cancel := context.WithTimeout(ctx, 30*time.Minute) 108 defer cancel() 109 var pj prowv1.ProwJob 110 if err := r.pjclientset.Get(ctx, req.NamespacedName, &pj); err != nil { 111 if errors.IsNotFound(err) { 112 log.Debug("object no longer exist") 113 return nil, nil 114 } 115 116 return nil, fmt.Errorf("failed to get prowjob %s: %w", req.String(), err) 117 } 118 119 if !r.shouldHandle(&pj) { 120 return nil, nil 121 } 122 123 log = log.WithField("jobName", pj.Spec.Job) 124 125 if !r.reporter.ShouldReport(ctx, log, &pj) { 126 return nil, nil 127 } 128 129 // we set omitempty on PrevReportStates, so here we need to init it if is nil 130 if pj.Status.PrevReportStates == nil { 131 pj.Status.PrevReportStates = map[string]prowv1.ProwJobState{} 132 } 133 134 // already reported current state 135 if pj.Status.PrevReportStates[r.reporter.GetName()] == pj.Status.State { 136 log.Trace("Already reported") 137 return nil, nil 138 } 139 140 log = log.WithField("jobStatus", pj.Status.State) 141 log.Info("Will report state") 142 pjs, requeue, err := r.reporter.Report(ctx, log, &pj) 143 if err != nil { 144 if criercommonlib.IsUserError(err) { 145 log.WithError(err).Debug("Failed to report job.") 146 } else { 147 log.WithError(err).Error("Failed to report job.") 148 } 149 crierMetrics.reportingResults.WithLabelValues(r.reporter.GetName(), ResultError).Inc() 150 return nil, fmt.Errorf("failed to report job: %w", err) 151 } 152 if requeue != nil { 153 return requeue, nil 154 } 155 156 crierMetrics.reportingResults.WithLabelValues(r.reporter.GetName(), ResultSuccess).Inc() 157 log.WithField("job-count", len(pjs)).Info("Reported job(s), now will update pj(s).") 158 var lastErr error 159 for _, pjob := range pjs { 160 if err := criercommonlib.UpdateReportStateWithRetries(ctx, pjob, log, r.pjclientset, r.reporter.GetName()); err != nil { 161 log.WithError(err).Error("Failed to update report state on prowjob") 162 // The error above is alreay logged, so it would be duplicated 163 // effort to combine all errors to return, only capture the last 164 // error should be sufficient. 165 lastErr = err 166 } 167 } 168 169 if pj.Status.CompletionTime != nil { 170 latency := time.Now().Unix() - pj.Status.CompletionTime.Unix() 171 crierMetrics.latency.WithLabelValues(r.reporter.GetName()).Observe(float64(latency)) 172 log.WithField("latency", latency).Debug("Report latency.") 173 } 174 175 return nil, lastErr 176 } 177 178 func (r *reconciler) shouldHandle(pj *prowv1.ProwJob) bool { 179 refs := pj.Spec.ExtraRefs 180 if pj.Spec.Refs != nil { 181 refs = append(refs, *pj.Spec.Refs) 182 } 183 if len(refs) == 0 { 184 return true 185 } 186 187 // It is possible to have conflicting settings here, we choose 188 // to report if in doubt because reporting multiple times is 189 // better than not reporting at all. 190 var enabled bool 191 for _, ref := range refs { 192 if r.enablementChecker(ref.Org, ref.Repo) { 193 enabled = true 194 break 195 } 196 } 197 198 return enabled 199 }