github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/prow/cmd/build/controller.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "errors" 21 "fmt" 22 "strings" 23 "time" 24 25 prowjobv1 "k8s.io/test-infra/prow/apis/prowjobs/v1" 26 prowjobset "k8s.io/test-infra/prow/client/clientset/versioned" 27 prowjobscheme "k8s.io/test-infra/prow/client/clientset/versioned/scheme" 28 prowjobinfov1 "k8s.io/test-infra/prow/client/informers/externalversions/prowjobs/v1" 29 prowjoblisters "k8s.io/test-infra/prow/client/listers/prowjobs/v1" 30 "k8s.io/test-infra/prow/kube" 31 "k8s.io/test-infra/prow/pjutil" 32 "k8s.io/test-infra/prow/pod-utils/clone" 33 "k8s.io/test-infra/prow/pod-utils/decorate" 34 "k8s.io/test-infra/prow/pod-utils/downwardapi" 35 36 buildv1alpha1 "github.com/knative/build/pkg/apis/build/v1alpha1" 37 duckv1alpha1 "github.com/knative/pkg/apis/duck/v1alpha1" 38 "github.com/sirupsen/logrus" 39 untypedcorev1 "k8s.io/api/core/v1" 40 apierrors "k8s.io/apimachinery/pkg/api/errors" 41 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 42 "k8s.io/apimachinery/pkg/runtime/schema" 43 "k8s.io/apimachinery/pkg/util/runtime" 44 "k8s.io/apimachinery/pkg/util/sets" 45 "k8s.io/apimachinery/pkg/util/wait" 46 "k8s.io/client-go/kubernetes" 47 "k8s.io/client-go/kubernetes/scheme" 48 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 49 "k8s.io/client-go/tools/cache" 50 "k8s.io/client-go/tools/record" 51 ) 52 53 const ( 54 controllerName = "prow-build-crd" 55 ) 56 57 type limiter interface { 58 ShutDown() 59 Get() (interface{}, bool) 60 Done(interface{}) 61 Forget(interface{}) 62 AddRateLimited(interface{}) 63 } 64 65 type controller struct { 66 pjNamespace string 67 pjc prowjobset.Interface 68 builds map[string]buildConfig 69 totURL string 70 71 pjLister prowjoblisters.ProwJobLister 72 pjInformer cache.SharedIndexInformer 73 74 workqueue limiter 75 76 recorder record.EventRecorder 77 78 prowJobsDone bool 79 buildsDone map[string]bool 80 wait string 81 } 82 83 // hasSynced returns true when every prowjob and build informer has synced. 84 func (c *controller) hasSynced() bool { 85 if !c.pjInformer.HasSynced() { 86 if c.wait != "prowjobs" { 87 c.wait = "prowjobs" 88 ns := c.pjNamespace 89 if ns == "" { 90 ns = "controller's" 91 } 92 logrus.Infof("Waiting on prowjobs in %s namespace...", ns) 93 } 94 return false // still syncing prowjobs 95 } 96 if !c.prowJobsDone { 97 c.prowJobsDone = true 98 logrus.Info("Synced prow jobs") 99 } 100 if c.buildsDone == nil { 101 c.buildsDone = map[string]bool{} 102 } 103 for n, cfg := range c.builds { 104 if !cfg.informer.Informer().HasSynced() { 105 if c.wait != n { 106 c.wait = n 107 logrus.Infof("Waiting on %s builds...", n) 108 } 109 return false // still syncing builds in at least one cluster 110 } else if !c.buildsDone[n] { 111 c.buildsDone[n] = true 112 logrus.Infof("Synced %s builds", n) 113 } 114 } 115 return true // Everyone is synced 116 } 117 118 func newController(kc kubernetes.Interface, pjc prowjobset.Interface, pji prowjobinfov1.ProwJobInformer, buildConfigs map[string]buildConfig, totURL, pjNamespace string, rl limiter) *controller { 119 // Log to events 120 prowjobscheme.AddToScheme(scheme.Scheme) 121 eventBroadcaster := record.NewBroadcaster() 122 eventBroadcaster.StartLogging(logrus.Infof) 123 eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: kc.CoreV1().Events("")}) 124 recorder := eventBroadcaster.NewRecorder(scheme.Scheme, untypedcorev1.EventSource{Component: controllerName}) 125 126 // Create struct 127 c := &controller{ 128 pjc: pjc, 129 builds: buildConfigs, 130 pjLister: pji.Lister(), 131 pjInformer: pji.Informer(), 132 workqueue: rl, 133 recorder: recorder, 134 totURL: totURL, 135 pjNamespace: pjNamespace, 136 } 137 138 logrus.Info("Setting up event handlers") 139 140 // Reconcile whenever a prowjob changes 141 pji.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 142 AddFunc: func(obj interface{}) { 143 pj, ok := obj.(*prowjobv1.ProwJob) 144 if !ok { 145 logrus.Warnf("Ignoring bad prowjob add: %v", obj) 146 return 147 } 148 c.enqueueKey(pj.Spec.Cluster, pj) 149 }, 150 UpdateFunc: func(old, new interface{}) { 151 pj, ok := new.(*prowjobv1.ProwJob) 152 if !ok { 153 logrus.Warnf("Ignoring bad prowjob update: %v", new) 154 return 155 } 156 c.enqueueKey(pj.Spec.Cluster, pj) 157 }, 158 DeleteFunc: func(obj interface{}) { 159 pj, ok := obj.(*prowjobv1.ProwJob) 160 if !ok { 161 logrus.Warnf("Ignoring bad prowjob delete: %v", obj) 162 return 163 } 164 c.enqueueKey(pj.Spec.Cluster, pj) 165 }, 166 }) 167 168 for ctx, cfg := range buildConfigs { 169 // Reconcile whenever a build changes. 170 ctx := ctx // otherwise it will change 171 cfg.informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 172 AddFunc: func(obj interface{}) { 173 c.enqueueKey(ctx, obj) 174 }, 175 UpdateFunc: func(old, new interface{}) { 176 c.enqueueKey(ctx, new) 177 }, 178 DeleteFunc: func(obj interface{}) { 179 c.enqueueKey(ctx, obj) 180 }, 181 }) 182 } 183 184 return c 185 } 186 187 // Run starts threads workers, returning after receiving a stop signal. 188 func (c *controller) Run(threads int, stop <-chan struct{}) error { 189 defer runtime.HandleCrash() 190 defer c.workqueue.ShutDown() 191 192 logrus.Info("Starting Build controller") 193 logrus.Info("Waiting for informer caches to sync") 194 if ok := cache.WaitForCacheSync(stop, c.hasSynced); !ok { 195 return fmt.Errorf("failed to wait for caches to sync") 196 } 197 198 logrus.Info("Starting workers") 199 for i := 0; i < threads; i++ { 200 go wait.Until(c.runWorker, time.Second, stop) 201 } 202 203 logrus.Info("Started workers") 204 <-stop 205 logrus.Info("Shutting down workers") 206 return nil 207 } 208 209 // runWorker dequeues to reconcile, until the queue has closed. 210 func (c *controller) runWorker() { 211 for { 212 key, shutdown := c.workqueue.Get() 213 if shutdown { 214 return 215 } 216 func() { 217 defer c.workqueue.Done(key) 218 219 if err := reconcile(c, key.(string)); err != nil { 220 runtime.HandleError(fmt.Errorf("failed to reconcile %s: %v", key, err)) 221 return // Do not forget so we retry later. 222 } 223 c.workqueue.Forget(key) 224 }() 225 } 226 } 227 228 // toKey returns context/namespace/name 229 func toKey(ctx, namespace, name string) string { 230 return strings.Join([]string{ctx, namespace, name}, "/") 231 } 232 233 // fromKey converts toKey back into its parts 234 func fromKey(key string) (string, string, string, error) { 235 parts := strings.Split(key, "/") 236 if len(parts) != 3 { 237 return "", "", "", fmt.Errorf("bad key: %q", key) 238 } 239 return parts[0], parts[1], parts[2], nil 240 } 241 242 // enqueueKey schedules an item for reconciliation. 243 func (c *controller) enqueueKey(ctx string, obj interface{}) { 244 switch o := obj.(type) { 245 case *prowjobv1.ProwJob: 246 c.workqueue.AddRateLimited(toKey(ctx, o.Spec.Namespace, o.Name)) 247 case *buildv1alpha1.Build: 248 c.workqueue.AddRateLimited(toKey(ctx, o.Namespace, o.Name)) 249 default: 250 logrus.Warnf("cannot enqueue unknown type %T: %v", o, obj) 251 return 252 } 253 } 254 255 type reconciler interface { 256 getProwJob(name string) (*prowjobv1.ProwJob, error) 257 getBuild(context, namespace, name string) (*buildv1alpha1.Build, error) 258 deleteBuild(context, namespace, name string) error 259 createBuild(context, namespace string, b *buildv1alpha1.Build) (*buildv1alpha1.Build, error) 260 updateProwJob(pj *prowjobv1.ProwJob) (*prowjobv1.ProwJob, error) 261 now() metav1.Time 262 buildID(prowjobv1.ProwJob) (string, error) 263 } 264 265 func (c *controller) getProwJob(name string) (*prowjobv1.ProwJob, error) { 266 return c.pjLister.ProwJobs(c.pjNamespace).Get(name) 267 } 268 269 func (c *controller) updateProwJob(pj *prowjobv1.ProwJob) (*prowjobv1.ProwJob, error) { 270 logrus.Debugf("updateProwJob(%s)", pj.Name) 271 return c.pjc.ProwV1().ProwJobs(c.pjNamespace).Update(pj) 272 } 273 274 func (c *controller) getBuild(context, namespace, name string) (*buildv1alpha1.Build, error) { 275 b, ok := c.builds[context] 276 if !ok { 277 return nil, errors.New("context not found") 278 } 279 return b.informer.Lister().Builds(namespace).Get(name) 280 } 281 func (c *controller) deleteBuild(context, namespace, name string) error { 282 logrus.Debugf("deleteBuild(%s,%s,%s)", context, namespace, name) 283 b, ok := c.builds[context] 284 if !ok { 285 return errors.New("context not found") 286 } 287 return b.client.BuildV1alpha1().Builds(namespace).Delete(name, &metav1.DeleteOptions{}) 288 } 289 func (c *controller) createBuild(context, namespace string, b *buildv1alpha1.Build) (*buildv1alpha1.Build, error) { 290 logrus.Debugf("createBuild(%s,%s,%s)", context, namespace, b.Name) 291 bc, ok := c.builds[context] 292 if !ok { 293 return nil, errors.New("context not found") 294 } 295 return bc.client.BuildV1alpha1().Builds(namespace).Create(b) 296 } 297 func (c *controller) now() metav1.Time { 298 return metav1.Now() 299 } 300 301 func (c *controller) buildID(pj prowjobv1.ProwJob) (string, error) { 302 return pjutil.GetBuildID(pj.Spec.Job, c.totURL) 303 } 304 305 var ( 306 groupVersionKind = schema.GroupVersionKind{ 307 Group: prowjobv1.SchemeGroupVersion.Group, 308 Version: prowjobv1.SchemeGroupVersion.Version, 309 Kind: "ProwJob", 310 } 311 ) 312 313 // reconcile ensures a knative-build prowjob has a corresponding build, updating the prowjob's status as the build progresses. 314 func reconcile(c reconciler, key string) error { 315 ctx, namespace, name, err := fromKey(key) 316 if err != nil { 317 runtime.HandleError(err) 318 return nil 319 } 320 321 var wantBuild bool 322 323 pj, err := c.getProwJob(name) 324 switch { 325 case apierrors.IsNotFound(err): 326 // Do not want build 327 case err != nil: 328 return fmt.Errorf("get prowjob: %v", err) 329 case pj.Spec.Agent != prowjobv1.KnativeBuildAgent: 330 // Do not want a build for this job 331 case pj.Spec.Cluster != ctx: 332 // Build is in wrong cluster, we do not want this build 333 logrus.Warnf("%s found in context %s not %s", key, ctx, pj.Spec.Cluster) 334 case pj.DeletionTimestamp == nil: 335 wantBuild = true 336 } 337 338 var haveBuild bool 339 340 // TODO(fejta): make trigger set the expected Namespace for the pod/build. 341 b, err := c.getBuild(ctx, namespace, name) 342 switch { 343 case apierrors.IsNotFound(err): 344 // Do not have a build 345 case err != nil: 346 return fmt.Errorf("get build %s: %v", key, err) 347 case b.DeletionTimestamp == nil: 348 haveBuild = true 349 } 350 351 // Should we create or delete this build? 352 switch { 353 case !wantBuild: 354 if !haveBuild { 355 if pj != nil && pj.Spec.Agent == prowjobv1.KnativeBuildAgent { 356 logrus.Infof("Observed deleted %s", key) 357 } 358 return nil 359 } 360 switch v, ok := b.Labels[kube.CreatedByProw]; { 361 case !ok, v != "true": // Not controlled by this 362 return nil 363 } 364 logrus.Infof("Delete builds/%s", key) 365 if err = c.deleteBuild(ctx, namespace, name); err != nil { 366 return fmt.Errorf("delete build: %v", err) 367 } 368 return nil 369 case finalState(pj.Status.State): 370 logrus.Infof("Observed finished %s", key) 371 return nil 372 case wantBuild && pj.Spec.BuildSpec == nil: 373 return errors.New("nil BuildSpec") 374 case wantBuild && !haveBuild: 375 id, err := c.buildID(*pj) 376 if err != nil { 377 return fmt.Errorf("failed to get build id: %v", err) 378 } 379 if b, err = makeBuild(*pj, id); err != nil { 380 return fmt.Errorf("make build: %v", err) 381 } 382 logrus.Infof("Create builds/%s", key) 383 if b, err = c.createBuild(ctx, namespace, b); err != nil { 384 return fmt.Errorf("create build: %v", err) 385 } 386 } 387 388 // Ensure prowjob status is correct 389 haveState := pj.Status.State 390 haveMsg := pj.Status.Description 391 wantState, wantMsg := prowJobStatus(b.Status) 392 if haveState != wantState || haveMsg != wantMsg { 393 npj := pj.DeepCopy() 394 if npj.Status.StartTime.IsZero() { 395 npj.Status.StartTime = c.now() 396 } 397 if npj.Status.CompletionTime.IsZero() && finalState(wantState) { 398 now := c.now() 399 npj.Status.CompletionTime = &now 400 } 401 npj.Status.State = wantState 402 npj.Status.Description = wantMsg 403 logrus.Infof("Update prowjobs/%s", key) 404 if _, err = c.updateProwJob(npj); err != nil { 405 return fmt.Errorf("update prow status: %v", err) 406 } 407 } 408 return nil 409 } 410 411 // finalState returns true if the prowjob has already finished 412 func finalState(status prowjobv1.ProwJobState) bool { 413 switch status { 414 case "", prowjobv1.PendingState, prowjobv1.TriggeredState: 415 return false 416 } 417 return true 418 } 419 420 // description computes the ProwJobStatus description for this condition or falling back to a default if none is provided. 421 func description(cond duckv1alpha1.Condition, fallback string) string { 422 switch { 423 case cond.Message != "": 424 return cond.Message 425 case cond.Reason != "": 426 return cond.Reason 427 } 428 return fallback 429 } 430 431 const ( 432 descScheduling = "scheduling" 433 descInitializing = "initializing" 434 descRunning = "running" 435 descSucceeded = "succeeded" 436 descFailed = "failed" 437 descUnknown = "unknown status" 438 descMissingCondition = "missing end condition" 439 ) 440 441 // prowJobStatus returns the desired state and description based on the build status. 442 func prowJobStatus(bs buildv1alpha1.BuildStatus) (prowjobv1.ProwJobState, string) { 443 started := bs.StartTime 444 finished := bs.CompletionTime 445 pcond := bs.GetCondition(buildv1alpha1.BuildSucceeded) 446 if pcond == nil { 447 if !finished.IsZero() { 448 return prowjobv1.ErrorState, descMissingCondition 449 } 450 return prowjobv1.TriggeredState, descScheduling 451 } 452 cond := *pcond 453 switch { 454 case cond.Status == untypedcorev1.ConditionTrue: 455 return prowjobv1.SuccessState, description(cond, descSucceeded) 456 case cond.Status == untypedcorev1.ConditionFalse: 457 return prowjobv1.FailureState, description(cond, descFailed) 458 case started.IsZero(): 459 return prowjobv1.TriggeredState, description(cond, descInitializing) 460 case cond.Status == untypedcorev1.ConditionUnknown, finished.IsZero(): 461 return prowjobv1.PendingState, description(cond, descRunning) 462 } 463 logrus.Warnf("Unknown condition %#v", cond) 464 return prowjobv1.ErrorState, description(cond, descUnknown) // shouldn't happen 465 } 466 467 // TODO(fejta): knative/build convert package should export "workspace", "home", "/workspace" 468 // https://github.com/knative/build/blob/17e8cf8417e1ef3d29bd465d4f45ad19dd3a3f2c/pkg/builder/cluster/convert/convert.go#L39-L65 469 const ( 470 workspaceMountName = "workspace" 471 homeMountName = "home" 472 workspaceMountPath = "/workspace" 473 ) 474 475 var ( 476 codeMount = untypedcorev1.VolumeMount{ 477 Name: workspaceMountName, 478 MountPath: "/code-mount", // should be irrelevant 479 } 480 logMount = untypedcorev1.VolumeMount{ 481 Name: homeMountName, 482 MountPath: "/var/prow-build-log", // should be irrelevant 483 } 484 ) 485 486 func buildMeta(pj prowjobv1.ProwJob) metav1.ObjectMeta { 487 podLabels, annotations := decorate.LabelsAndAnnotationsForJob(pj) 488 return metav1.ObjectMeta{ 489 Annotations: annotations, 490 Name: pj.Name, 491 Namespace: pj.Spec.Namespace, 492 Labels: podLabels, 493 } 494 } 495 496 // buildEnv constructs the environment map for the job 497 func buildEnv(pj prowjobv1.ProwJob, buildID string) (map[string]string, error) { 498 return downwardapi.EnvForSpec(downwardapi.NewJobSpec(pj.Spec, buildID, pj.Name)) 499 } 500 501 // defaultArguments will append each arg to the template, except where the argument name is already defined. 502 func defaultArguments(t *buildv1alpha1.TemplateInstantiationSpec, rawEnv map[string]string) { 503 keys := sets.String{} 504 for _, arg := range t.Arguments { 505 keys.Insert(arg.Name) 506 } 507 for k, v := range rawEnv { 508 if keys.Has(k) { 509 continue 510 } 511 t.Arguments = append(t.Arguments, buildv1alpha1.ArgumentSpec{Name: k, Value: v}) 512 } 513 } 514 515 // defaultEnv adds the map of environment variables to the container, except keys already defined. 516 func defaultEnv(c *untypedcorev1.Container, rawEnv map[string]string) { 517 keys := sets.String{} 518 for _, arg := range c.Env { 519 keys.Insert(arg.Name) 520 } 521 for k, v := range rawEnv { 522 if keys.Has(k) { 523 continue 524 } 525 c.Env = append(c.Env, untypedcorev1.EnvVar{Name: k, Value: v}) 526 } 527 } 528 529 // injectEnvironment will add rawEnv to the build steps and/or template arguments. 530 func injectEnvironment(b *buildv1alpha1.Build, rawEnv map[string]string) { 531 for i := range b.Spec.Steps { // Inject environment variables to each step 532 defaultEnv(&b.Spec.Steps[i], rawEnv) 533 } 534 if b.Spec.Template != nil { // Also add it as template arguments 535 defaultArguments(b.Spec.Template, rawEnv) 536 } 537 } 538 539 func workDir(refs prowjobv1.Refs) buildv1alpha1.ArgumentSpec { 540 // workspaceMountName is auto-injected into each step at workspaceMountPath 541 return buildv1alpha1.ArgumentSpec{Name: "WORKDIR", Value: clone.PathForRefs(workspaceMountPath, refs)} 542 } 543 544 // injectSource adds the custom source container to call clonerefs correctly. 545 // 546 // Does nothing if the build spec predefines Source 547 func injectSource(b *buildv1alpha1.Build, pj prowjobv1.ProwJob) error { 548 if b.Spec.Source != nil { 549 return nil 550 } 551 srcContainer, refs, cloneVolumes, err := decorate.CloneRefs(pj, codeMount, logMount) 552 if err != nil { 553 return fmt.Errorf("clone source error: %v", err) 554 } 555 if srcContainer == nil { 556 return nil 557 } else { 558 srcContainer.Name = "" // knative-build requirement 559 } 560 561 b.Spec.Source = &buildv1alpha1.SourceSpec{ 562 Custom: srcContainer, 563 } 564 b.Spec.Volumes = append(b.Spec.Volumes, cloneVolumes...) 565 566 wd := workDir(refs[0]) 567 // Inject correct working directory 568 for i := range b.Spec.Steps { 569 if b.Spec.Steps[i].WorkingDir != "" { 570 continue 571 } 572 b.Spec.Steps[i].WorkingDir = wd.Value 573 } 574 if b.Spec.Template != nil { 575 // Best we can do for a template is to set WORKDIR 576 b.Spec.Template.Arguments = append(b.Spec.Template.Arguments, wd) 577 } 578 579 return nil 580 } 581 582 // makeBuild creates a build from the prowjob, using the prowjob's buildspec. 583 func makeBuild(pj prowjobv1.ProwJob, buildID string) (*buildv1alpha1.Build, error) { 584 if pj.Spec.BuildSpec == nil { 585 return nil, errors.New("nil BuildSpec") 586 } 587 b := buildv1alpha1.Build{ 588 ObjectMeta: buildMeta(pj), 589 Spec: *pj.Spec.BuildSpec, 590 } 591 rawEnv, err := buildEnv(pj, buildID) 592 if err != nil { 593 return nil, fmt.Errorf("environment error: %v", err) 594 } 595 injectEnvironment(&b, rawEnv) 596 err = injectSource(&b, pj) 597 598 return &b, nil 599 }