github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/cmd/horologium/main.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "context" 21 "flag" 22 "fmt" 23 "os" 24 "time" 25 26 "github.com/sirupsen/logrus" 27 "k8s.io/apimachinery/pkg/util/sets" 28 "sigs.k8s.io/controller-runtime/pkg/cache" 29 ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client" 30 "sigs.k8s.io/controller-runtime/pkg/cluster" 31 32 prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1" 33 "sigs.k8s.io/prow/pkg/config" 34 "sigs.k8s.io/prow/pkg/cron" 35 pkgflagutil "sigs.k8s.io/prow/pkg/flagutil" 36 prowflagutil "sigs.k8s.io/prow/pkg/flagutil" 37 configflagutil "sigs.k8s.io/prow/pkg/flagutil/config" 38 "sigs.k8s.io/prow/pkg/interrupts" 39 "sigs.k8s.io/prow/pkg/logrusutil" 40 "sigs.k8s.io/prow/pkg/metrics" 41 "sigs.k8s.io/prow/pkg/pjutil" 42 "sigs.k8s.io/prow/pkg/pjutil/pprof" 43 ) 44 45 const ( 46 defaultTickInterval = time.Minute 47 ) 48 49 type options struct { 50 config configflagutil.ConfigOptions 51 52 kubernetes prowflagutil.KubernetesOptions 53 instrumentationOptions prowflagutil.InstrumentationOptions 54 controllerManager prowflagutil.ControllerManagerOptions 55 dryRun bool 56 } 57 58 func gatherOptions(fs *flag.FlagSet, args ...string) options { 59 var o options 60 61 fs.BoolVar(&o.dryRun, "dry-run", true, "Whether or not to make mutating API calls to Kubernetes.") 62 o.config.AddFlags(fs) 63 o.kubernetes.AddFlags(fs) 64 o.instrumentationOptions.AddFlags(fs) 65 o.controllerManager.TimeoutListingProwJobsDefault = 60 * time.Second 66 o.controllerManager.AddFlags(fs) 67 68 fs.Parse(args) 69 return o 70 } 71 72 func (o *options) Validate() error { 73 for _, group := range []pkgflagutil.OptionGroup{&o.kubernetes, &o.config, &o.controllerManager} { 74 if err := group.Validate(o.dryRun); err != nil { 75 return err 76 } 77 } 78 79 return nil 80 } 81 82 func main() { 83 logrusutil.ComponentInit() 84 85 o := gatherOptions(flag.NewFlagSet(os.Args[0], flag.ExitOnError), os.Args[1:]...) 86 if err := o.Validate(); err != nil { 87 logrus.WithError(err).Fatal("Invalid options") 88 } 89 90 defer interrupts.WaitForGracefulShutdown() 91 92 pprof.Instrument(o.instrumentationOptions) 93 94 configAgent, err := o.config.ConfigAgent() 95 if err != nil { 96 logrus.WithError(err).Fatal("Error starting config agent.") 97 } 98 99 cfg, err := o.kubernetes.InfrastructureClusterConfig(o.dryRun) 100 if err != nil { 101 logrus.WithError(err).Fatal("Failed to get prowjob kubeconfig") 102 } 103 cluster, err := cluster.New(cfg, func(o *cluster.Options) { 104 o.Cache.DefaultNamespaces = map[string]cache.Config{ 105 configAgent.Config().ProwJobNamespace: {}, 106 } 107 }) 108 if err != nil { 109 logrus.WithError(err).Fatal("Failed to construct prowjob client") 110 } 111 // Trigger cache creation for ProwJobs so the following cacheSync actually does something. If we don't 112 // do this here, the first List request for ProwJobs will transiently trigger cache creation and sync, 113 // which doesn't allow us to fail the binary if it doesn't work. 114 if _, err := cluster.GetCache().GetInformer(interrupts.Context(), &prowapi.ProwJob{}); err != nil { 115 logrus.WithError(err).Fatal("Failed to get a prowjob informer") 116 } 117 interrupts.Run(func(ctx context.Context) { 118 if err := cluster.Start(ctx); err != nil { 119 logrus.WithError(err).Fatal("Controller failed to start") 120 } 121 logrus.Info("Controller finished gracefully.") 122 }) 123 mgrSyncCtx, mgrSyncCtxCancel := context.WithTimeout(context.Background(), o.controllerManager.TimeoutListingProwJobs) 124 defer mgrSyncCtxCancel() 125 if synced := cluster.GetCache().WaitForCacheSync(mgrSyncCtx); !synced { 126 logrus.Fatal("Timed out waiting for cache sync") 127 } 128 129 // start a cron 130 cr := cron.New() 131 cr.Start() 132 133 metrics.ExposeMetrics("horologium", configAgent.Config().PushGateway, o.instrumentationOptions.MetricsPort) 134 135 tickInterval := defaultTickInterval 136 if configAgent.Config().Horologium.TickInterval != nil { 137 tickInterval = configAgent.Config().Horologium.TickInterval.Duration 138 } 139 interrupts.TickLiteral(func() { 140 start := time.Now() 141 if err := sync(cluster.GetClient(), configAgent.Config(), cr, start); err != nil { 142 logrus.WithError(err).Error("Error syncing periodic jobs.") 143 } 144 logrus.WithField("duration", time.Since(start)).Info("Synced periodic jobs") 145 }, tickInterval) 146 } 147 148 type cronClient interface { 149 SyncConfig(cfg *config.Config) error 150 QueuedJobs() []string 151 } 152 153 func sync(prowJobClient ctrlruntimeclient.Client, cfg *config.Config, cr cronClient, now time.Time) error { 154 jobs := &prowapi.ProwJobList{} 155 if err := prowJobClient.List(context.TODO(), jobs, ctrlruntimeclient.InNamespace(cfg.ProwJobNamespace)); err != nil { 156 return fmt.Errorf("error listing prow jobs: %w", err) 157 } 158 latestJobs := pjutil.GetLatestProwJobs(jobs.Items, prowapi.PeriodicJob) 159 160 if err := cr.SyncConfig(cfg); err != nil { 161 logrus.WithError(err).Error("Error syncing cron jobs.") 162 } 163 164 cronTriggers := sets.New[string]() 165 for _, job := range cr.QueuedJobs() { 166 cronTriggers.Insert(job) 167 } 168 169 var errs []error 170 for _, p := range cfg.Periodics { 171 j, previousFound := latestJobs[p.Name] 172 logger := logrus.WithFields(logrus.Fields{ 173 "job": p.Name, 174 "previous-found": previousFound, 175 }) 176 177 var shouldTrigger = false 178 switch { 179 case p.Cron == "": // no cron expression is set, we use interval to trigger 180 if j.Complete() { 181 intervalRef := j.Status.StartTime.Time 182 intervalDuration := p.GetInterval() 183 if p.MinimumInterval != "" { 184 intervalRef = j.Status.CompletionTime.Time 185 intervalDuration = p.GetMinimumInterval() 186 } 187 shouldTrigger = now.Sub(intervalRef) > intervalDuration 188 } 189 case cronTriggers.Has(p.Name): 190 shouldTrigger = j.Complete() 191 default: 192 if !cronTriggers.Has(p.Name) { 193 logger.WithFields(logrus.Fields{ 194 "previous-found": previousFound, 195 "should-trigger": shouldTrigger, 196 "name": p.Name, 197 "job": p.JobBase.Name, 198 }).Info("Skipping cron periodic") 199 } 200 continue 201 } 202 if !shouldTrigger { 203 logger.WithFields(logrus.Fields{ 204 "previous-found": previousFound, 205 "name": p.Name, 206 "job": p.JobBase.Name, 207 }).Debug("Trigger time has not yet been reached.") 208 } 209 if !previousFound || shouldTrigger { 210 prowJob := pjutil.NewProwJob(pjutil.PeriodicSpec(p), p.Labels, p.Annotations, 211 pjutil.RequireScheduling(cfg.Scheduler.Enabled)) 212 prowJob.Namespace = cfg.ProwJobNamespace 213 logger.WithFields(logrus.Fields{ 214 "should-trigger": shouldTrigger, 215 "previous-found": previousFound, 216 }).WithFields( 217 pjutil.ProwJobFields(&prowJob), 218 ).Info("Triggering new run.") 219 if err := prowJobClient.Create(context.TODO(), &prowJob); err != nil { 220 errs = append(errs, err) 221 } 222 } 223 } 224 225 if len(errs) > 0 { 226 return fmt.Errorf("failed to create %d prowjobs: %v", len(errs), errs) 227 } 228 return nil 229 }