sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/cmd/horologium/main.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"os"
    24  	"time"
    25  
    26  	"github.com/sirupsen/logrus"
    27  	"k8s.io/apimachinery/pkg/util/sets"
    28  	ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client"
    29  	"sigs.k8s.io/controller-runtime/pkg/cluster"
    30  
    31  	prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    32  	"sigs.k8s.io/prow/pkg/config"
    33  	"sigs.k8s.io/prow/pkg/cron"
    34  	pkgflagutil "sigs.k8s.io/prow/pkg/flagutil"
    35  	prowflagutil "sigs.k8s.io/prow/pkg/flagutil"
    36  	configflagutil "sigs.k8s.io/prow/pkg/flagutil/config"
    37  	"sigs.k8s.io/prow/pkg/interrupts"
    38  	"sigs.k8s.io/prow/pkg/logrusutil"
    39  	"sigs.k8s.io/prow/pkg/metrics"
    40  	"sigs.k8s.io/prow/pkg/pjutil"
    41  	"sigs.k8s.io/prow/pkg/pjutil/pprof"
    42  )
    43  
    44  const (
    45  	defaultTickInterval = time.Minute
    46  )
    47  
    48  type options struct {
    49  	config configflagutil.ConfigOptions
    50  
    51  	kubernetes             prowflagutil.KubernetesOptions
    52  	instrumentationOptions prowflagutil.InstrumentationOptions
    53  	controllerManager      prowflagutil.ControllerManagerOptions
    54  	dryRun                 bool
    55  }
    56  
    57  func gatherOptions(fs *flag.FlagSet, args ...string) options {
    58  	var o options
    59  
    60  	fs.BoolVar(&o.dryRun, "dry-run", true, "Whether or not to make mutating API calls to Kubernetes.")
    61  	o.config.AddFlags(fs)
    62  	o.kubernetes.AddFlags(fs)
    63  	o.instrumentationOptions.AddFlags(fs)
    64  	o.controllerManager.TimeoutListingProwJobsDefault = 60 * time.Second
    65  	o.controllerManager.AddFlags(fs)
    66  
    67  	fs.Parse(args)
    68  	return o
    69  }
    70  
    71  func (o *options) Validate() error {
    72  	for _, group := range []pkgflagutil.OptionGroup{&o.kubernetes, &o.config, &o.controllerManager} {
    73  		if err := group.Validate(o.dryRun); err != nil {
    74  			return err
    75  		}
    76  	}
    77  
    78  	return nil
    79  }
    80  
    81  func main() {
    82  	logrusutil.ComponentInit()
    83  
    84  	o := gatherOptions(flag.NewFlagSet(os.Args[0], flag.ExitOnError), os.Args[1:]...)
    85  	if err := o.Validate(); err != nil {
    86  		logrus.WithError(err).Fatal("Invalid options")
    87  	}
    88  
    89  	defer interrupts.WaitForGracefulShutdown()
    90  
    91  	pprof.Instrument(o.instrumentationOptions)
    92  
    93  	configAgent, err := o.config.ConfigAgent()
    94  	if err != nil {
    95  		logrus.WithError(err).Fatal("Error starting config agent.")
    96  	}
    97  
    98  	cfg, err := o.kubernetes.InfrastructureClusterConfig(o.dryRun)
    99  	if err != nil {
   100  		logrus.WithError(err).Fatal("Failed to get prowjob kubeconfig")
   101  	}
   102  	cluster, err := cluster.New(cfg, func(o *cluster.Options) { o.Namespace = configAgent.Config().ProwJobNamespace })
   103  	if err != nil {
   104  		logrus.WithError(err).Fatal("Failed to construct prowjob client")
   105  	}
   106  	// Trigger cache creation for ProwJobs so the following cacheSync actually does something. If we don't
   107  	// do this here, the first List request for ProwJobs will transiently trigger cache creation and sync,
   108  	// which doesn't allow us to fail the binary if it doesn't work.
   109  	if _, err := cluster.GetCache().GetInformer(interrupts.Context(), &prowapi.ProwJob{}); err != nil {
   110  		logrus.WithError(err).Fatal("Failed to get a prowjob informer")
   111  	}
   112  	interrupts.Run(func(ctx context.Context) {
   113  		if err := cluster.Start(ctx); err != nil {
   114  			logrus.WithError(err).Fatal("Controller failed to start")
   115  		}
   116  		logrus.Info("Controller finished gracefully.")
   117  	})
   118  	mgrSyncCtx, mgrSyncCtxCancel := context.WithTimeout(context.Background(), o.controllerManager.TimeoutListingProwJobs)
   119  	defer mgrSyncCtxCancel()
   120  	if synced := cluster.GetCache().WaitForCacheSync(mgrSyncCtx); !synced {
   121  		logrus.Fatal("Timed out waiting for cache sync")
   122  	}
   123  
   124  	// start a cron
   125  	cr := cron.New()
   126  	cr.Start()
   127  
   128  	metrics.ExposeMetrics("horologium", configAgent.Config().PushGateway, o.instrumentationOptions.MetricsPort)
   129  
   130  	tickInterval := defaultTickInterval
   131  	if configAgent.Config().Horologium.TickInterval != nil {
   132  		tickInterval = configAgent.Config().Horologium.TickInterval.Duration
   133  	}
   134  	interrupts.TickLiteral(func() {
   135  		start := time.Now()
   136  		if err := sync(cluster.GetClient(), configAgent.Config(), cr, start); err != nil {
   137  			logrus.WithError(err).Error("Error syncing periodic jobs.")
   138  		}
   139  		logrus.WithField("duration", time.Since(start)).Info("Synced periodic jobs")
   140  	}, tickInterval)
   141  }
   142  
   143  type cronClient interface {
   144  	SyncConfig(cfg *config.Config) error
   145  	QueuedJobs() []string
   146  }
   147  
   148  func sync(prowJobClient ctrlruntimeclient.Client, cfg *config.Config, cr cronClient, now time.Time) error {
   149  	jobs := &prowapi.ProwJobList{}
   150  	if err := prowJobClient.List(context.TODO(), jobs, ctrlruntimeclient.InNamespace(cfg.ProwJobNamespace)); err != nil {
   151  		return fmt.Errorf("error listing prow jobs: %w", err)
   152  	}
   153  	latestJobs := pjutil.GetLatestProwJobs(jobs.Items, prowapi.PeriodicJob)
   154  
   155  	if err := cr.SyncConfig(cfg); err != nil {
   156  		logrus.WithError(err).Error("Error syncing cron jobs.")
   157  	}
   158  
   159  	cronTriggers := sets.New[string]()
   160  	for _, job := range cr.QueuedJobs() {
   161  		cronTriggers.Insert(job)
   162  	}
   163  
   164  	var errs []error
   165  	for _, p := range cfg.Periodics {
   166  		j, previousFound := latestJobs[p.Name]
   167  		logger := logrus.WithFields(logrus.Fields{
   168  			"job":            p.Name,
   169  			"previous-found": previousFound,
   170  		})
   171  
   172  		var shouldTrigger = false
   173  		switch {
   174  		case p.Cron == "": // no cron expression is set, we use interval to trigger
   175  			if j.Complete() {
   176  				intervalRef := j.Status.StartTime.Time
   177  				intervalDuration := p.GetInterval()
   178  				if p.MinimumInterval != "" {
   179  					intervalRef = j.Status.CompletionTime.Time
   180  					intervalDuration = p.GetMinimumInterval()
   181  				}
   182  				shouldTrigger = now.Sub(intervalRef) > intervalDuration
   183  			}
   184  		case cronTriggers.Has(p.Name):
   185  			shouldTrigger = j.Complete()
   186  		default:
   187  			if !cronTriggers.Has(p.Name) {
   188  				logger.WithFields(logrus.Fields{
   189  					"previous-found": previousFound,
   190  					"should-trigger": shouldTrigger,
   191  					"name":           p.Name,
   192  					"job":            p.JobBase.Name,
   193  				}).Info("Skipping cron periodic")
   194  			}
   195  			continue
   196  		}
   197  		if !shouldTrigger {
   198  			logger.WithFields(logrus.Fields{
   199  				"previous-found": previousFound,
   200  				"name":           p.Name,
   201  				"job":            p.JobBase.Name,
   202  			}).Debug("Trigger time has not yet been reached.")
   203  		}
   204  		if !previousFound || shouldTrigger {
   205  			prowJob := pjutil.NewProwJob(pjutil.PeriodicSpec(p), p.Labels, p.Annotations,
   206  				pjutil.RequireScheduling(cfg.Scheduler.Enabled))
   207  			prowJob.Namespace = cfg.ProwJobNamespace
   208  			logger.WithFields(logrus.Fields{
   209  				"should-trigger": shouldTrigger,
   210  				"previous-found": previousFound,
   211  			}).WithFields(
   212  				pjutil.ProwJobFields(&prowJob),
   213  			).Info("Triggering new run.")
   214  			if err := prowJobClient.Create(context.TODO(), &prowJob); err != nil {
   215  				errs = append(errs, err)
   216  			}
   217  		}
   218  	}
   219  
   220  	if len(errs) > 0 {
   221  		return fmt.Errorf("failed to create %d prowjobs: %v", len(errs), errs)
   222  	}
   223  	return nil
   224  }