github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/cmd/horologium/main.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"os"
    24  	"time"
    25  
    26  	"github.com/sirupsen/logrus"
    27  	"k8s.io/apimachinery/pkg/util/sets"
    28  	"sigs.k8s.io/controller-runtime/pkg/cache"
    29  	ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client"
    30  	"sigs.k8s.io/controller-runtime/pkg/cluster"
    31  
    32  	prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    33  	"sigs.k8s.io/prow/pkg/config"
    34  	"sigs.k8s.io/prow/pkg/cron"
    35  	pkgflagutil "sigs.k8s.io/prow/pkg/flagutil"
    36  	prowflagutil "sigs.k8s.io/prow/pkg/flagutil"
    37  	configflagutil "sigs.k8s.io/prow/pkg/flagutil/config"
    38  	"sigs.k8s.io/prow/pkg/interrupts"
    39  	"sigs.k8s.io/prow/pkg/logrusutil"
    40  	"sigs.k8s.io/prow/pkg/metrics"
    41  	"sigs.k8s.io/prow/pkg/pjutil"
    42  	"sigs.k8s.io/prow/pkg/pjutil/pprof"
    43  )
    44  
    45  const (
    46  	defaultTickInterval = time.Minute
    47  )
    48  
    49  type options struct {
    50  	config configflagutil.ConfigOptions
    51  
    52  	kubernetes             prowflagutil.KubernetesOptions
    53  	instrumentationOptions prowflagutil.InstrumentationOptions
    54  	controllerManager      prowflagutil.ControllerManagerOptions
    55  	dryRun                 bool
    56  }
    57  
    58  func gatherOptions(fs *flag.FlagSet, args ...string) options {
    59  	var o options
    60  
    61  	fs.BoolVar(&o.dryRun, "dry-run", true, "Whether or not to make mutating API calls to Kubernetes.")
    62  	o.config.AddFlags(fs)
    63  	o.kubernetes.AddFlags(fs)
    64  	o.instrumentationOptions.AddFlags(fs)
    65  	o.controllerManager.TimeoutListingProwJobsDefault = 60 * time.Second
    66  	o.controllerManager.AddFlags(fs)
    67  
    68  	fs.Parse(args)
    69  	return o
    70  }
    71  
    72  func (o *options) Validate() error {
    73  	for _, group := range []pkgflagutil.OptionGroup{&o.kubernetes, &o.config, &o.controllerManager} {
    74  		if err := group.Validate(o.dryRun); err != nil {
    75  			return err
    76  		}
    77  	}
    78  
    79  	return nil
    80  }
    81  
    82  func main() {
    83  	logrusutil.ComponentInit()
    84  
    85  	o := gatherOptions(flag.NewFlagSet(os.Args[0], flag.ExitOnError), os.Args[1:]...)
    86  	if err := o.Validate(); err != nil {
    87  		logrus.WithError(err).Fatal("Invalid options")
    88  	}
    89  
    90  	defer interrupts.WaitForGracefulShutdown()
    91  
    92  	pprof.Instrument(o.instrumentationOptions)
    93  
    94  	configAgent, err := o.config.ConfigAgent()
    95  	if err != nil {
    96  		logrus.WithError(err).Fatal("Error starting config agent.")
    97  	}
    98  
    99  	cfg, err := o.kubernetes.InfrastructureClusterConfig(o.dryRun)
   100  	if err != nil {
   101  		logrus.WithError(err).Fatal("Failed to get prowjob kubeconfig")
   102  	}
   103  	cluster, err := cluster.New(cfg, func(o *cluster.Options) {
   104  		o.Cache.DefaultNamespaces = map[string]cache.Config{
   105  			configAgent.Config().ProwJobNamespace: {},
   106  		}
   107  	})
   108  	if err != nil {
   109  		logrus.WithError(err).Fatal("Failed to construct prowjob client")
   110  	}
   111  	// Trigger cache creation for ProwJobs so the following cacheSync actually does something. If we don't
   112  	// do this here, the first List request for ProwJobs will transiently trigger cache creation and sync,
   113  	// which doesn't allow us to fail the binary if it doesn't work.
   114  	if _, err := cluster.GetCache().GetInformer(interrupts.Context(), &prowapi.ProwJob{}); err != nil {
   115  		logrus.WithError(err).Fatal("Failed to get a prowjob informer")
   116  	}
   117  	interrupts.Run(func(ctx context.Context) {
   118  		if err := cluster.Start(ctx); err != nil {
   119  			logrus.WithError(err).Fatal("Controller failed to start")
   120  		}
   121  		logrus.Info("Controller finished gracefully.")
   122  	})
   123  	mgrSyncCtx, mgrSyncCtxCancel := context.WithTimeout(context.Background(), o.controllerManager.TimeoutListingProwJobs)
   124  	defer mgrSyncCtxCancel()
   125  	if synced := cluster.GetCache().WaitForCacheSync(mgrSyncCtx); !synced {
   126  		logrus.Fatal("Timed out waiting for cache sync")
   127  	}
   128  
   129  	// start a cron
   130  	cr := cron.New()
   131  	cr.Start()
   132  
   133  	metrics.ExposeMetrics("horologium", configAgent.Config().PushGateway, o.instrumentationOptions.MetricsPort)
   134  
   135  	tickInterval := defaultTickInterval
   136  	if configAgent.Config().Horologium.TickInterval != nil {
   137  		tickInterval = configAgent.Config().Horologium.TickInterval.Duration
   138  	}
   139  	interrupts.TickLiteral(func() {
   140  		start := time.Now()
   141  		if err := sync(cluster.GetClient(), configAgent.Config(), cr, start); err != nil {
   142  			logrus.WithError(err).Error("Error syncing periodic jobs.")
   143  		}
   144  		logrus.WithField("duration", time.Since(start)).Info("Synced periodic jobs")
   145  	}, tickInterval)
   146  }
   147  
   148  type cronClient interface {
   149  	SyncConfig(cfg *config.Config) error
   150  	QueuedJobs() []string
   151  }
   152  
   153  func sync(prowJobClient ctrlruntimeclient.Client, cfg *config.Config, cr cronClient, now time.Time) error {
   154  	jobs := &prowapi.ProwJobList{}
   155  	if err := prowJobClient.List(context.TODO(), jobs, ctrlruntimeclient.InNamespace(cfg.ProwJobNamespace)); err != nil {
   156  		return fmt.Errorf("error listing prow jobs: %w", err)
   157  	}
   158  	latestJobs := pjutil.GetLatestProwJobs(jobs.Items, prowapi.PeriodicJob)
   159  
   160  	if err := cr.SyncConfig(cfg); err != nil {
   161  		logrus.WithError(err).Error("Error syncing cron jobs.")
   162  	}
   163  
   164  	cronTriggers := sets.New[string]()
   165  	for _, job := range cr.QueuedJobs() {
   166  		cronTriggers.Insert(job)
   167  	}
   168  
   169  	var errs []error
   170  	for _, p := range cfg.Periodics {
   171  		j, previousFound := latestJobs[p.Name]
   172  		logger := logrus.WithFields(logrus.Fields{
   173  			"job":            p.Name,
   174  			"previous-found": previousFound,
   175  		})
   176  
   177  		var shouldTrigger = false
   178  		switch {
   179  		case p.Cron == "": // no cron expression is set, we use interval to trigger
   180  			if j.Complete() {
   181  				intervalRef := j.Status.StartTime.Time
   182  				intervalDuration := p.GetInterval()
   183  				if p.MinimumInterval != "" {
   184  					intervalRef = j.Status.CompletionTime.Time
   185  					intervalDuration = p.GetMinimumInterval()
   186  				}
   187  				shouldTrigger = now.Sub(intervalRef) > intervalDuration
   188  			}
   189  		case cronTriggers.Has(p.Name):
   190  			shouldTrigger = j.Complete()
   191  		default:
   192  			if !cronTriggers.Has(p.Name) {
   193  				logger.WithFields(logrus.Fields{
   194  					"previous-found": previousFound,
   195  					"should-trigger": shouldTrigger,
   196  					"name":           p.Name,
   197  					"job":            p.JobBase.Name,
   198  				}).Info("Skipping cron periodic")
   199  			}
   200  			continue
   201  		}
   202  		if !shouldTrigger {
   203  			logger.WithFields(logrus.Fields{
   204  				"previous-found": previousFound,
   205  				"name":           p.Name,
   206  				"job":            p.JobBase.Name,
   207  			}).Debug("Trigger time has not yet been reached.")
   208  		}
   209  		if !previousFound || shouldTrigger {
   210  			prowJob := pjutil.NewProwJob(pjutil.PeriodicSpec(p), p.Labels, p.Annotations,
   211  				pjutil.RequireScheduling(cfg.Scheduler.Enabled))
   212  			prowJob.Namespace = cfg.ProwJobNamespace
   213  			logger.WithFields(logrus.Fields{
   214  				"should-trigger": shouldTrigger,
   215  				"previous-found": previousFound,
   216  			}).WithFields(
   217  				pjutil.ProwJobFields(&prowJob),
   218  			).Info("Triggering new run.")
   219  			if err := prowJobClient.Create(context.TODO(), &prowJob); err != nil {
   220  				errs = append(errs, err)
   221  			}
   222  		}
   223  	}
   224  
   225  	if len(errs) > 0 {
   226  		return fmt.Errorf("failed to create %d prowjobs: %v", len(errs), errs)
   227  	}
   228  	return nil
   229  }