sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/cmd/horologium/main.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "context" 21 "flag" 22 "fmt" 23 "os" 24 "time" 25 26 "github.com/sirupsen/logrus" 27 "k8s.io/apimachinery/pkg/util/sets" 28 ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client" 29 "sigs.k8s.io/controller-runtime/pkg/cluster" 30 31 prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1" 32 "sigs.k8s.io/prow/pkg/config" 33 "sigs.k8s.io/prow/pkg/cron" 34 pkgflagutil "sigs.k8s.io/prow/pkg/flagutil" 35 prowflagutil "sigs.k8s.io/prow/pkg/flagutil" 36 configflagutil "sigs.k8s.io/prow/pkg/flagutil/config" 37 "sigs.k8s.io/prow/pkg/interrupts" 38 "sigs.k8s.io/prow/pkg/logrusutil" 39 "sigs.k8s.io/prow/pkg/metrics" 40 "sigs.k8s.io/prow/pkg/pjutil" 41 "sigs.k8s.io/prow/pkg/pjutil/pprof" 42 ) 43 44 const ( 45 defaultTickInterval = time.Minute 46 ) 47 48 type options struct { 49 config configflagutil.ConfigOptions 50 51 kubernetes prowflagutil.KubernetesOptions 52 instrumentationOptions prowflagutil.InstrumentationOptions 53 controllerManager prowflagutil.ControllerManagerOptions 54 dryRun bool 55 } 56 57 func gatherOptions(fs *flag.FlagSet, args ...string) options { 58 var o options 59 60 fs.BoolVar(&o.dryRun, "dry-run", true, "Whether or not to make mutating API calls to Kubernetes.") 61 o.config.AddFlags(fs) 62 o.kubernetes.AddFlags(fs) 63 o.instrumentationOptions.AddFlags(fs) 64 o.controllerManager.TimeoutListingProwJobsDefault = 60 * time.Second 65 o.controllerManager.AddFlags(fs) 66 67 fs.Parse(args) 68 return o 69 } 70 71 func (o *options) Validate() error { 72 for _, group := range []pkgflagutil.OptionGroup{&o.kubernetes, &o.config, &o.controllerManager} { 73 if err := group.Validate(o.dryRun); err != nil { 74 return err 75 } 76 } 77 78 return nil 79 } 80 81 func main() { 82 logrusutil.ComponentInit() 83 84 o := gatherOptions(flag.NewFlagSet(os.Args[0], flag.ExitOnError), os.Args[1:]...) 85 if err := o.Validate(); err != nil { 86 logrus.WithError(err).Fatal("Invalid options") 87 } 88 89 defer interrupts.WaitForGracefulShutdown() 90 91 pprof.Instrument(o.instrumentationOptions) 92 93 configAgent, err := o.config.ConfigAgent() 94 if err != nil { 95 logrus.WithError(err).Fatal("Error starting config agent.") 96 } 97 98 cfg, err := o.kubernetes.InfrastructureClusterConfig(o.dryRun) 99 if err != nil { 100 logrus.WithError(err).Fatal("Failed to get prowjob kubeconfig") 101 } 102 cluster, err := cluster.New(cfg, func(o *cluster.Options) { o.Namespace = configAgent.Config().ProwJobNamespace }) 103 if err != nil { 104 logrus.WithError(err).Fatal("Failed to construct prowjob client") 105 } 106 // Trigger cache creation for ProwJobs so the following cacheSync actually does something. If we don't 107 // do this here, the first List request for ProwJobs will transiently trigger cache creation and sync, 108 // which doesn't allow us to fail the binary if it doesn't work. 109 if _, err := cluster.GetCache().GetInformer(interrupts.Context(), &prowapi.ProwJob{}); err != nil { 110 logrus.WithError(err).Fatal("Failed to get a prowjob informer") 111 } 112 interrupts.Run(func(ctx context.Context) { 113 if err := cluster.Start(ctx); err != nil { 114 logrus.WithError(err).Fatal("Controller failed to start") 115 } 116 logrus.Info("Controller finished gracefully.") 117 }) 118 mgrSyncCtx, mgrSyncCtxCancel := context.WithTimeout(context.Background(), o.controllerManager.TimeoutListingProwJobs) 119 defer mgrSyncCtxCancel() 120 if synced := cluster.GetCache().WaitForCacheSync(mgrSyncCtx); !synced { 121 logrus.Fatal("Timed out waiting for cache sync") 122 } 123 124 // start a cron 125 cr := cron.New() 126 cr.Start() 127 128 metrics.ExposeMetrics("horologium", configAgent.Config().PushGateway, o.instrumentationOptions.MetricsPort) 129 130 tickInterval := defaultTickInterval 131 if configAgent.Config().Horologium.TickInterval != nil { 132 tickInterval = configAgent.Config().Horologium.TickInterval.Duration 133 } 134 interrupts.TickLiteral(func() { 135 start := time.Now() 136 if err := sync(cluster.GetClient(), configAgent.Config(), cr, start); err != nil { 137 logrus.WithError(err).Error("Error syncing periodic jobs.") 138 } 139 logrus.WithField("duration", time.Since(start)).Info("Synced periodic jobs") 140 }, tickInterval) 141 } 142 143 type cronClient interface { 144 SyncConfig(cfg *config.Config) error 145 QueuedJobs() []string 146 } 147 148 func sync(prowJobClient ctrlruntimeclient.Client, cfg *config.Config, cr cronClient, now time.Time) error { 149 jobs := &prowapi.ProwJobList{} 150 if err := prowJobClient.List(context.TODO(), jobs, ctrlruntimeclient.InNamespace(cfg.ProwJobNamespace)); err != nil { 151 return fmt.Errorf("error listing prow jobs: %w", err) 152 } 153 latestJobs := pjutil.GetLatestProwJobs(jobs.Items, prowapi.PeriodicJob) 154 155 if err := cr.SyncConfig(cfg); err != nil { 156 logrus.WithError(err).Error("Error syncing cron jobs.") 157 } 158 159 cronTriggers := sets.New[string]() 160 for _, job := range cr.QueuedJobs() { 161 cronTriggers.Insert(job) 162 } 163 164 var errs []error 165 for _, p := range cfg.Periodics { 166 j, previousFound := latestJobs[p.Name] 167 logger := logrus.WithFields(logrus.Fields{ 168 "job": p.Name, 169 "previous-found": previousFound, 170 }) 171 172 var shouldTrigger = false 173 switch { 174 case p.Cron == "": // no cron expression is set, we use interval to trigger 175 if j.Complete() { 176 intervalRef := j.Status.StartTime.Time 177 intervalDuration := p.GetInterval() 178 if p.MinimumInterval != "" { 179 intervalRef = j.Status.CompletionTime.Time 180 intervalDuration = p.GetMinimumInterval() 181 } 182 shouldTrigger = now.Sub(intervalRef) > intervalDuration 183 } 184 case cronTriggers.Has(p.Name): 185 shouldTrigger = j.Complete() 186 default: 187 if !cronTriggers.Has(p.Name) { 188 logger.WithFields(logrus.Fields{ 189 "previous-found": previousFound, 190 "should-trigger": shouldTrigger, 191 "name": p.Name, 192 "job": p.JobBase.Name, 193 }).Info("Skipping cron periodic") 194 } 195 continue 196 } 197 if !shouldTrigger { 198 logger.WithFields(logrus.Fields{ 199 "previous-found": previousFound, 200 "name": p.Name, 201 "job": p.JobBase.Name, 202 }).Debug("Trigger time has not yet been reached.") 203 } 204 if !previousFound || shouldTrigger { 205 prowJob := pjutil.NewProwJob(pjutil.PeriodicSpec(p), p.Labels, p.Annotations, 206 pjutil.RequireScheduling(cfg.Scheduler.Enabled)) 207 prowJob.Namespace = cfg.ProwJobNamespace 208 logger.WithFields(logrus.Fields{ 209 "should-trigger": shouldTrigger, 210 "previous-found": previousFound, 211 }).WithFields( 212 pjutil.ProwJobFields(&prowJob), 213 ).Info("Triggering new run.") 214 if err := prowJobClient.Create(context.TODO(), &prowJob); err != nil { 215 errs = append(errs, err) 216 } 217 } 218 } 219 220 if len(errs) > 0 { 221 return fmt.Errorf("failed to create %d prowjobs: %v", len(errs), errs) 222 } 223 return nil 224 }