github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/crier/controller.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package crier reports finished prowjob status to git providers.
    18  package crier
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"time"
    24  
    25  	"github.com/sirupsen/logrus"
    26  	"k8s.io/apimachinery/pkg/api/errors"
    27  	"k8s.io/client-go/util/workqueue"
    28  	"sigs.k8s.io/controller-runtime/pkg/builder"
    29  	ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client"
    30  	"sigs.k8s.io/controller-runtime/pkg/controller"
    31  	"sigs.k8s.io/controller-runtime/pkg/manager"
    32  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    33  
    34  	prowv1 "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    35  	"sigs.k8s.io/prow/pkg/crier/reporters/criercommonlib"
    36  )
    37  
    38  type ReportClient interface {
    39  	// Report reports a Prowjob. The provided logger is already populated with the
    40  	// prowjob name and the reporter name.
    41  	// If a reporter wants to defer reporting, it can return a reconcile.Result with a RequeueAfter
    42  	Report(ctx context.Context, log *logrus.Entry, pj *prowv1.ProwJob) ([]*prowv1.ProwJob, *reconcile.Result, error)
    43  	GetName() string
    44  	// ShouldReport determines if a ProwJob should be reported. The provided logger
    45  	// is already populated with the prowjob name and the reporter name.
    46  	ShouldReport(ctx context.Context, log *logrus.Entry, pj *prowv1.ProwJob) bool
    47  }
    48  
    49  // reconciler struct defines how a controller should encapsulate
    50  // logging, client connectivity, informing (list and watching)
    51  // queueing, and handling of resource changes
    52  type reconciler struct {
    53  	pjclientset       ctrlruntimeclient.Client
    54  	reporter          ReportClient
    55  	enablementChecker func(org, repo string) bool
    56  }
    57  
    58  // New constructs a new instance of the crier reconciler.
    59  func New(
    60  	mgr manager.Manager,
    61  	reporter ReportClient,
    62  	numWorkers int,
    63  	enablementChecker func(org, repo string) bool,
    64  ) error {
    65  	if err := builder.
    66  		ControllerManagedBy(mgr).
    67  		// Is used for metrics, hence must be unique per controller instance
    68  		Named(fmt.Sprintf("crier_%s", reporter.GetName())).
    69  		For(&prowv1.ProwJob{}).
    70  		WithOptions(controller.Options{MaxConcurrentReconciles: numWorkers,
    71  			RateLimiter: workqueue.DefaultControllerRateLimiter()}).
    72  		Complete(&reconciler{
    73  			pjclientset:       mgr.GetClient(),
    74  			reporter:          reporter,
    75  			enablementChecker: enablementChecker,
    76  		}); err != nil {
    77  		return fmt.Errorf("failed to construct controller: %w", err)
    78  	}
    79  
    80  	return nil
    81  }
    82  
    83  // Reconcile retrieves each queued item and takes the necessary handler action based off of if
    84  // the item was created or deleted.
    85  func (r *reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
    86  	log := logrus.WithField("reporter", r.reporter.GetName()).WithField("key", req.String()).WithField("prowjob", req.Name)
    87  	log.Debug("processing next key")
    88  	result, err := r.reconcile(ctx, log, req)
    89  	if err != nil {
    90  		if criercommonlib.IsUserError(err) {
    91  			log.WithError(err).Debug("Reconciliation failed")
    92  		} else {
    93  			log.WithError(err).Error("Reconciliation failed")
    94  		}
    95  	}
    96  	if result == nil {
    97  		result = &reconcile.Result{}
    98  	}
    99  	return *result, err
   100  }
   101  
   102  func (r *reconciler) reconcile(ctx context.Context, log *logrus.Entry, req reconcile.Request) (*reconcile.Result, error) {
   103  	// Limit reconciliation time to 30 minutes. This should more than enough time
   104  	// for any reasonable reporter. Most reporters should set a stricter timeout
   105  	// themselves. This mainly helps avoid leaking reconciliation threads that
   106  	// will never complete.
   107  	ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
   108  	defer cancel()
   109  	var pj prowv1.ProwJob
   110  	if err := r.pjclientset.Get(ctx, req.NamespacedName, &pj); err != nil {
   111  		if errors.IsNotFound(err) {
   112  			log.Debug("object no longer exist")
   113  			return nil, nil
   114  		}
   115  
   116  		return nil, fmt.Errorf("failed to get prowjob %s: %w", req.String(), err)
   117  	}
   118  
   119  	if !r.shouldHandle(&pj) {
   120  		return nil, nil
   121  	}
   122  
   123  	log = log.WithField("jobName", pj.Spec.Job)
   124  
   125  	if !r.reporter.ShouldReport(ctx, log, &pj) {
   126  		return nil, nil
   127  	}
   128  
   129  	// we set omitempty on PrevReportStates, so here we need to init it if is nil
   130  	if pj.Status.PrevReportStates == nil {
   131  		pj.Status.PrevReportStates = map[string]prowv1.ProwJobState{}
   132  	}
   133  
   134  	// already reported current state
   135  	if pj.Status.PrevReportStates[r.reporter.GetName()] == pj.Status.State {
   136  		log.Trace("Already reported")
   137  		return nil, nil
   138  	}
   139  
   140  	log = log.WithField("jobStatus", pj.Status.State)
   141  	log.Info("Will report state")
   142  	pjs, requeue, err := r.reporter.Report(ctx, log, &pj)
   143  	if err != nil {
   144  		if criercommonlib.IsUserError(err) {
   145  			log.WithError(err).Debug("Failed to report job.")
   146  		} else {
   147  			log.WithError(err).Error("Failed to report job.")
   148  		}
   149  		crierMetrics.reportingResults.WithLabelValues(r.reporter.GetName(), ResultError).Inc()
   150  		return nil, fmt.Errorf("failed to report job: %w", err)
   151  	}
   152  	if requeue != nil {
   153  		return requeue, nil
   154  	}
   155  
   156  	crierMetrics.reportingResults.WithLabelValues(r.reporter.GetName(), ResultSuccess).Inc()
   157  	log.WithField("job-count", len(pjs)).Info("Reported job(s), now will update pj(s).")
   158  	var lastErr error
   159  	for _, pjob := range pjs {
   160  		if err := criercommonlib.UpdateReportStateWithRetries(ctx, pjob, log, r.pjclientset, r.reporter.GetName()); err != nil {
   161  			log.WithError(err).Error("Failed to update report state on prowjob")
   162  			// The error above is alreay logged, so it would be duplicated
   163  			// effort to combine all errors to return, only capture the last
   164  			// error should be sufficient.
   165  			lastErr = err
   166  		}
   167  	}
   168  
   169  	if pj.Status.CompletionTime != nil {
   170  		latency := time.Now().Unix() - pj.Status.CompletionTime.Unix()
   171  		crierMetrics.latency.WithLabelValues(r.reporter.GetName()).Observe(float64(latency))
   172  		log.WithField("latency", latency).Debug("Report latency.")
   173  	}
   174  
   175  	return nil, lastErr
   176  }
   177  
   178  func (r *reconciler) shouldHandle(pj *prowv1.ProwJob) bool {
   179  	refs := pj.Spec.ExtraRefs
   180  	if pj.Spec.Refs != nil {
   181  		refs = append(refs, *pj.Spec.Refs)
   182  	}
   183  	if len(refs) == 0 {
   184  		return true
   185  	}
   186  
   187  	// It is possible to have conflicting settings here, we choose
   188  	// to report if in doubt because reporting multiple times is
   189  	// better than not reporting at all.
   190  	var enabled bool
   191  	for _, ref := range refs {
   192  		if r.enablementChecker(ref.Org, ref.Repo) {
   193  			enabled = true
   194  			break
   195  		}
   196  	}
   197  
   198  	return enabled
   199  }