github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/liveupdate/reconciler.go (about) 1 package liveupdate 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync" 8 "time" 9 10 v1 "k8s.io/api/core/v1" 11 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 "k8s.io/apimachinery/pkg/runtime" 13 "k8s.io/apimachinery/pkg/types" 14 "sigs.k8s.io/controller-runtime/pkg/builder" 15 "sigs.k8s.io/controller-runtime/pkg/client" 16 "sigs.k8s.io/controller-runtime/pkg/handler" 17 18 apierrors "k8s.io/apimachinery/pkg/api/errors" 19 ctrl "sigs.k8s.io/controller-runtime" 20 ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" 21 "sigs.k8s.io/controller-runtime/pkg/reconcile" 22 23 "github.com/tilt-dev/tilt/internal/build" 24 "github.com/tilt-dev/tilt/internal/container" 25 "github.com/tilt-dev/tilt/internal/containerupdate" 26 "github.com/tilt-dev/tilt/internal/controllers/apicmp" 27 "github.com/tilt-dev/tilt/internal/controllers/apis/configmap" 28 "github.com/tilt-dev/tilt/internal/controllers/apis/liveupdate" 29 "github.com/tilt-dev/tilt/internal/controllers/indexer" 30 "github.com/tilt-dev/tilt/internal/k8s" 31 "github.com/tilt-dev/tilt/internal/ospath" 32 "github.com/tilt-dev/tilt/internal/sliceutils" 33 "github.com/tilt-dev/tilt/internal/store" 34 "github.com/tilt-dev/tilt/internal/store/buildcontrols" 35 "github.com/tilt-dev/tilt/internal/store/k8sconv" 36 "github.com/tilt-dev/tilt/internal/store/liveupdates" 37 "github.com/tilt-dev/tilt/pkg/apis" 38 "github.com/tilt-dev/tilt/pkg/apis/core/v1alpha1" 39 "github.com/tilt-dev/tilt/pkg/logger" 40 "github.com/tilt-dev/tilt/pkg/model" 41 "github.com/tilt-dev/tilt/pkg/model/logstore" 42 ) 43 44 const LiveUpdateSource = "liveupdate" 45 46 var discoveryGVK = v1alpha1.SchemeGroupVersion.WithKind("KubernetesDiscovery") 47 var dcsGVK = v1alpha1.SchemeGroupVersion.WithKind("DockerComposeService") 48 var applyGVK = v1alpha1.SchemeGroupVersion.WithKind("KubernetesApply") 49 var fwGVK = v1alpha1.SchemeGroupVersion.WithKind("FileWatch") 50 var imageMapGVK = v1alpha1.SchemeGroupVersion.WithKind("ImageMap") 51 52 var reasonObjectNotFound = "ObjectNotFound" 53 54 // Manages the LiveUpdate API object. 55 type Reconciler struct { 56 client ctrlclient.Client 57 indexer *indexer.Indexer 58 store store.RStore 59 60 ExecUpdater containerupdate.ContainerUpdater 61 DockerUpdater containerupdate.ContainerUpdater 62 updateMode liveupdates.UpdateMode 63 kubeContext k8s.KubeContext 64 startedTime metav1.MicroTime 65 66 monitors map[string]*monitor 67 68 // We need to be able to map trigger events to known resources while 69 // Reconcile() is running. 70 mu sync.Mutex 71 } 72 73 var _ reconcile.Reconciler = &Reconciler{} 74 75 // Dependency-inject a live update reconciler. 76 func NewReconciler( 77 st store.RStore, 78 dcu *containerupdate.DockerUpdater, 79 ecu *containerupdate.ExecUpdater, 80 updateMode liveupdates.UpdateMode, 81 kubeContext k8s.KubeContext, 82 client ctrlclient.Client, 83 scheme *runtime.Scheme) *Reconciler { 84 return &Reconciler{ 85 DockerUpdater: dcu, 86 ExecUpdater: ecu, 87 updateMode: updateMode, 88 kubeContext: kubeContext, 89 client: client, 90 indexer: indexer.NewIndexer(scheme, indexLiveUpdate), 91 store: st, 92 startedTime: apis.NowMicro(), 93 monitors: make(map[string]*monitor), 94 } 95 } 96 97 // Create a reconciler baked by a fake ContainerUpdater and Client. 98 func NewFakeReconciler( 99 st store.RStore, 100 cu containerupdate.ContainerUpdater, 101 client ctrlclient.Client) *Reconciler { 102 scheme := v1alpha1.NewScheme() 103 return &Reconciler{ 104 DockerUpdater: cu, 105 ExecUpdater: cu, 106 updateMode: liveupdates.UpdateModeAuto, 107 kubeContext: k8s.KubeContext("fake-context"), 108 client: client, 109 indexer: indexer.NewIndexer(scheme, indexLiveUpdate), 110 store: st, 111 startedTime: apis.NowMicro(), 112 monitors: make(map[string]*monitor), 113 } 114 } 115 116 func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 117 r.mu.Lock() 118 defer r.mu.Unlock() 119 120 lu := &v1alpha1.LiveUpdate{} 121 err := r.client.Get(ctx, req.NamespacedName, lu) 122 r.indexer.OnReconcile(req.NamespacedName, lu) 123 if err != nil && !apierrors.IsNotFound(err) { 124 return ctrl.Result{}, fmt.Errorf("liveupdate reconcile: %v", err) 125 } 126 127 if apierrors.IsNotFound(err) || lu.ObjectMeta.DeletionTimestamp != nil { 128 r.store.Dispatch(liveupdates.NewLiveUpdateDeleteAction(req.Name)) 129 delete(r.monitors, req.Name) 130 return ctrl.Result{}, nil 131 } 132 133 // The apiserver is the source of truth, and will ensure the engine state is up to date. 134 r.store.Dispatch(liveupdates.NewLiveUpdateUpsertAction(lu)) 135 136 ctx = store.MustObjectLogHandler(ctx, r.store, lu) 137 138 if lu.Annotations[v1alpha1.AnnotationManagedBy] != "" { 139 // A LiveUpdate can't be managed by the reconciler until all the objects 140 // it depends on are managed by the reconciler. The Tiltfile controller 141 // is responsible for marking objects that we want to manage with ForceApply(). 142 return ctrl.Result{}, nil 143 } 144 145 invalidSelectorFailedState := r.ensureSelectorValid(lu) 146 if invalidSelectorFailedState != nil { 147 return r.handleFailure(ctx, lu, invalidSelectorFailedState) 148 } 149 150 monitor := r.ensureMonitorExists(lu.Name, lu) 151 hasFileChanges, err := r.reconcileSources(ctx, monitor) 152 if err != nil { 153 if apierrors.IsNotFound(err) { 154 return r.handleFailure(ctx, lu, createFailedState(lu, reasonObjectNotFound, err.Error())) 155 } 156 return ctrl.Result{}, err 157 } 158 159 hasKubernetesChanges, err := r.reconcileKubernetesResource(ctx, monitor) 160 if err != nil { 161 if apierrors.IsNotFound(err) { 162 return r.handleFailure(ctx, lu, createFailedState(lu, reasonObjectNotFound, err.Error())) 163 } 164 return ctrl.Result{}, err 165 } 166 167 hasDockerComposeChanges, err := r.reconcileDockerComposeService(ctx, monitor) 168 if err != nil { 169 if apierrors.IsNotFound(err) { 170 return r.handleFailure(ctx, lu, createFailedState(lu, reasonObjectNotFound, err.Error())) 171 } 172 return ctrl.Result{}, err 173 } 174 175 hasTriggerQueueChanges, err := r.reconcileTriggerQueue(ctx, monitor) 176 if err != nil { 177 return ctrl.Result{}, err 178 } 179 180 if hasFileChanges || hasKubernetesChanges || hasDockerComposeChanges || hasTriggerQueueChanges { 181 monitor.hasChangesToSync = true 182 } 183 184 if monitor.hasChangesToSync { 185 status := r.maybeSync(ctx, lu, monitor) 186 if status.Failed != nil { 187 // Log any new failures. 188 isNew := lu.Status.Failed == nil || !apicmp.DeepEqual(lu.Status.Failed, status.Failed) 189 if isNew && r.shouldLogFailureReason(status.Failed) { 190 logger.Get(ctx).Infof("LiveUpdate %q %s: %v", lu.Name, status.Failed.Reason, status.Failed.Message) 191 } 192 } 193 194 if !apicmp.DeepEqual(lu.Status, status) { 195 update := lu.DeepCopy() 196 update.Status = status 197 198 err := r.client.Status().Update(ctx, update) 199 if err != nil { 200 return ctrl.Result{}, err 201 } 202 } 203 } 204 205 monitor.hasChangesToSync = false 206 207 return ctrl.Result{}, nil 208 } 209 210 func (r *Reconciler) shouldLogFailureReason(obj *v1alpha1.LiveUpdateStateFailed) bool { 211 // ObjectNotFound errors are normal before the Apply has created the KubernetesDiscovery object. 212 return obj.Reason != reasonObjectNotFound 213 } 214 215 // Check for some invalid states. 216 func (r *Reconciler) ensureSelectorValid(lu *v1alpha1.LiveUpdate) *v1alpha1.LiveUpdateStateFailed { 217 selector := lu.Spec.Selector 218 if selector.Kubernetes != nil { 219 if selector.Kubernetes.DiscoveryName == "" { 220 return createFailedState(lu, "Invalid", "Kubernetes selector requires DiscoveryName") 221 } 222 return nil 223 } 224 if selector.DockerCompose != nil { 225 if selector.DockerCompose.Service == "" { 226 return createFailedState(lu, "Invalid", "DockerCompose selector requires Service") 227 } 228 return nil 229 } 230 return createFailedState(lu, "Invalid", "No valid selector") 231 } 232 233 // If the failure state has changed, log it and write it to the apiserver. 234 func (r *Reconciler) handleFailure(ctx context.Context, lu *v1alpha1.LiveUpdate, failed *v1alpha1.LiveUpdateStateFailed) (ctrl.Result, error) { 235 isNew := lu.Status.Failed == nil || !apicmp.DeepEqual(lu.Status.Failed, failed) 236 if !isNew { 237 return ctrl.Result{}, nil 238 } 239 240 if r.shouldLogFailureReason(failed) { 241 logger.Get(ctx).Infof("LiveUpdate %q %s: %v", lu.Name, failed.Reason, failed.Message) 242 } 243 244 update := lu.DeepCopy() 245 update.Status.Failed = failed 246 247 err := r.client.Status().Update(ctx, update) 248 249 return ctrl.Result{}, err 250 } 251 252 // Create the monitor that tracks a live update. If the live update 253 // spec changes, wipe out all accumulated state. 254 func (r *Reconciler) ensureMonitorExists(name string, obj *v1alpha1.LiveUpdate) *monitor { 255 spec := obj.Spec 256 m, ok := r.monitors[name] 257 if ok && apicmp.DeepEqual(obj.Spec, m.spec) { 258 return m 259 } 260 261 m = &monitor{ 262 manifestName: obj.Annotations[v1alpha1.AnnotationManifest], 263 spec: spec, 264 sources: make(map[string]*monitorSource), 265 containers: make(map[monitorContainerKey]monitorContainerStatus), 266 } 267 r.monitors[name] = m 268 return m 269 } 270 271 // Consume all FileEvents off the FileWatch objects. 272 // Returns true if we saw new file events. 273 // 274 // TODO(nick): Currently, it's entirely possible to miss file events. This has 275 // always been true (since operating systems themselves put limits on the event 276 // queue.) But it gets worse in a world where we read FileEvents from the API, 277 // since the FileWatch API itself adds lower limits. 278 // 279 // Long-term, we ought to have some way to reconnect/resync like other 280 // sync systems do (syncthing/rsync). e.g., diff the two file systems 281 // and update based on changes. But it also might make more sense to switch to a 282 // different library for syncing (e.g., Mutagen) now that live updates 283 // are decoupled from other file event-triggered tasks. 284 // 285 // In the meantime, Milas+Nick should figure out a way to handle this 286 // better in the short term. 287 func (r *Reconciler) reconcileSources(ctx context.Context, monitor *monitor) (bool, error) { 288 if len(monitor.spec.Sources) == 0 { 289 return false, nil 290 } 291 292 hasChange := false 293 for _, s := range monitor.spec.Sources { 294 oneChange, err := r.reconcileOneSource(ctx, monitor, s) 295 if err != nil { 296 return false, err 297 } 298 if oneChange { 299 hasChange = true 300 } 301 } 302 return hasChange, nil 303 } 304 305 // Consume one Source object. 306 func (r *Reconciler) reconcileOneSource(ctx context.Context, monitor *monitor, source v1alpha1.LiveUpdateSource) (bool, error) { 307 fwn := source.FileWatch 308 imn := source.ImageMap 309 310 var fw v1alpha1.FileWatch 311 if fwn != "" { 312 err := r.client.Get(ctx, types.NamespacedName{Name: fwn}, &fw) 313 if err != nil { 314 return false, err 315 } 316 } 317 318 var im v1alpha1.ImageMap 319 if imn != "" { 320 err := r.client.Get(ctx, types.NamespacedName{Name: imn}, &im) 321 if err != nil { 322 return false, err 323 } 324 } 325 326 events := fw.Status.FileEvents 327 if len(events) == 0 || fwn == "" { 328 return false, nil 329 } 330 331 mSource, ok := monitor.sources[fwn] 332 if !ok { 333 mSource = &monitorSource{ 334 modTimeByPath: make(map[string]metav1.MicroTime), 335 } 336 monitor.sources[fwn] = mSource 337 } 338 339 newImageStatus := im.Status 340 imageChanged := false 341 if imn != "" { 342 imageChanged = mSource.lastImageStatus == nil || 343 !apicmp.DeepEqual(&newImageStatus, mSource.lastImageStatus) 344 mSource.lastImageStatus = &im.Status 345 } 346 347 newLastFileEvent := events[len(events)-1] 348 event := mSource.lastFileEvent 349 fileWatchChanged := event == nil || !apicmp.DeepEqual(&newLastFileEvent, event) 350 mSource.lastFileEvent = &newLastFileEvent 351 352 if fileWatchChanged { 353 // Consume all the file events. 354 for _, event := range events { 355 eventTime := event.Time.Time 356 if newImageStatus.BuildStartTime != nil && newImageStatus.BuildStartTime.After(eventTime) { 357 continue 358 } 359 360 for _, f := range event.SeenFiles { 361 existing, ok := mSource.modTimeByPath[f] 362 if !ok || existing.Time.Before(event.Time.Time) { 363 mSource.modTimeByPath[f] = event.Time 364 } 365 } 366 } 367 } 368 369 return fileWatchChanged || imageChanged, nil 370 } 371 372 // Consume the TriggerQueue. 373 // This isn't formally represented in the API right now, it's just 374 // a ConfigMap to pull attributes off of. 375 // Returns true if we saw any changes. 376 func (r *Reconciler) reconcileTriggerQueue(ctx context.Context, monitor *monitor) (bool, error) { 377 queue, err := configmap.TriggerQueue(ctx, r.client) 378 if err != nil { 379 return false, client.IgnoreNotFound(err) 380 } 381 382 if monitor.lastTriggerQueue != nil && apicmp.DeepEqual(queue.Data, monitor.lastTriggerQueue.Data) { 383 return false, nil 384 } 385 386 monitor.lastTriggerQueue = queue 387 return true, nil 388 } 389 390 // Consume all objects off the KubernetesSelector. 391 // Returns true if we saw any changes to the objects we're watching. 392 func (r *Reconciler) reconcileKubernetesResource(ctx context.Context, monitor *monitor) (bool, error) { 393 selector := monitor.spec.Selector.Kubernetes 394 if selector == nil { 395 return false, nil 396 } 397 398 var kd *v1alpha1.KubernetesDiscovery 399 var ka *v1alpha1.KubernetesApply 400 var im *v1alpha1.ImageMap 401 changed := false 402 if selector.ApplyName != "" { 403 ka = &v1alpha1.KubernetesApply{} 404 err := r.client.Get(ctx, types.NamespacedName{Name: selector.ApplyName}, ka) 405 if err != nil { 406 return false, err 407 } 408 409 if monitor.lastKubernetesApplyStatus == nil || 410 !apicmp.DeepEqual(monitor.lastKubernetesApplyStatus, &(ka.Status)) { 411 changed = true 412 } 413 } 414 415 kd = &v1alpha1.KubernetesDiscovery{} 416 err := r.client.Get(ctx, types.NamespacedName{Name: selector.DiscoveryName}, kd) 417 if err != nil { 418 return false, err 419 } 420 421 if selector.ImageMapName != "" { 422 im = &v1alpha1.ImageMap{} 423 if err := r.client.Get(ctx, types.NamespacedName{Name: selector.ImageMapName}, im); err != nil { 424 return false, err 425 } 426 427 if monitor.lastImageMap == nil || !apicmp.DeepEqual(monitor.lastImageMap, im) { 428 changed = true 429 } 430 } 431 432 if monitor.lastKubernetesDiscovery == nil || 433 !apicmp.DeepEqual(monitor.lastKubernetesDiscovery.Status, kd.Status) { 434 changed = true 435 } 436 437 if ka == nil { 438 monitor.lastKubernetesApplyStatus = nil 439 } else { 440 monitor.lastKubernetesApplyStatus = &(ka.Status) 441 } 442 443 monitor.lastKubernetesDiscovery = kd 444 monitor.lastImageMap = im 445 446 return changed, nil 447 } 448 449 // Consume all objects off the DockerComposeSelector. 450 // Returns true if we saw any changes to the objects we're watching. 451 func (r *Reconciler) reconcileDockerComposeService(ctx context.Context, monitor *monitor) (bool, error) { 452 selector := monitor.spec.Selector.DockerCompose 453 if selector == nil { 454 return false, nil 455 } 456 457 var dcs v1alpha1.DockerComposeService 458 err := r.client.Get(ctx, types.NamespacedName{Name: selector.Service}, &dcs) 459 if err != nil { 460 return false, err 461 } 462 463 changed := false 464 if monitor.lastDockerComposeService == nil || 465 !apicmp.DeepEqual(monitor.lastDockerComposeService.Status, dcs.Status) { 466 changed = true 467 } 468 469 monitor.lastDockerComposeService = &dcs 470 471 return changed, nil 472 } 473 474 // Go through all the file changes, and delete files that aren't relevant 475 // to the current build. 476 // 477 // Determining the current build is a bit tricky, but our 478 // order of preference is: 479 // 1. If we have an ImageMap.BuildStartedAt, this is the gold standard. 480 // 2. If there's no ImageMap, we prefer the KubernetesApply.LastApplyStartTime. 481 // 3. If there's no KubernetesApply, we prefer the oldest pod 482 // in the filtered pod list. 483 func (r *Reconciler) garbageCollectFileChanges(res luResource, monitor *monitor) { 484 for _, source := range monitor.spec.Sources { 485 fwn := source.FileWatch 486 mSource, ok := monitor.sources[fwn] 487 if !ok { 488 continue 489 } 490 491 lastImageStatus := mSource.lastImageStatus 492 var gcTime time.Time 493 if lastImageStatus != nil && lastImageStatus.BuildStartTime != nil { 494 gcTime = lastImageStatus.BuildStartTime.Time 495 } else { 496 gcTime = res.bestStartTime() 497 } 498 499 if !gcTime.IsZero() { 500 // Delete all file events that happened before the 501 // latest build started. 502 for p, t := range mSource.modTimeByPath { 503 if gcTime.After(t.Time) { 504 delete(mSource.modTimeByPath, p) 505 } 506 } 507 508 // Delete all failures that happened before the 509 // latest build started. 510 // 511 // This mechanism isn't perfect - for example, it will start resyncing 512 // again to a container that's going to be replaced by the current 513 // build. But we also can't determine if a container is going to be 514 // replaced or not (particularly if the image didn't change). 515 for key, c := range monitor.containers { 516 if !c.failedLowWaterMark.IsZero() && gcTime.After(c.failedLowWaterMark.Time) { 517 c.failedLowWaterMark = metav1.MicroTime{} 518 c.failedReason = "" 519 c.failedMessage = "" 520 monitor.containers[key] = c 521 } 522 } 523 } 524 } 525 } 526 527 // Go through all the container monitors, and delete any that are no longer 528 // being selected. We don't care why they're not being selected. 529 func (r *Reconciler) garbageCollectMonitorContainers(res luResource, monitor *monitor) { 530 // All containers are guaranteed to have container IDs if they're still active. 531 containerIDs := map[string]bool{} 532 res.visitSelectedContainers(func(pod v1alpha1.Pod, c v1alpha1.Container) bool { 533 if c.ID != "" { 534 containerIDs[c.ID] = true 535 } 536 return false 537 }) 538 539 for key := range monitor.containers { 540 if !containerIDs[key.containerID] { 541 delete(monitor.containers, key) 542 } 543 } 544 } 545 546 func (r *Reconciler) dispatchStartBuildAction(ctx context.Context, lu *v1alpha1.LiveUpdate, filesChanged []string) { 547 manifestName := lu.Annotations[v1alpha1.AnnotationManifest] 548 spanID := lu.Annotations[v1alpha1.AnnotationSpanID] 549 r.store.Dispatch(buildcontrols.BuildStartedAction{ 550 ManifestName: model.ManifestName(manifestName), 551 StartTime: time.Now(), 552 FilesChanged: filesChanged, 553 Reason: model.BuildReasonFlagChangedFiles, 554 SpanID: logstore.SpanID(spanID), 555 FullBuildTriggered: false, 556 Source: LiveUpdateSource, 557 }) 558 559 buildcontrols.LogBuildEntry(ctx, buildcontrols.BuildEntry{ 560 Name: model.ManifestName(manifestName), 561 BuildReason: model.BuildReasonFlagChangedFiles, 562 FilesChanged: filesChanged, 563 }) 564 } 565 566 func (r *Reconciler) dispatchCompleteBuildAction(lu *v1alpha1.LiveUpdate, newStatus v1alpha1.LiveUpdateStatus) { 567 manifestName := model.ManifestName(lu.Annotations[v1alpha1.AnnotationManifest]) 568 spanID := logstore.SpanID(lu.Annotations[v1alpha1.AnnotationSpanID]) 569 var err error 570 if newStatus.Failed != nil { 571 err = errors.New(newStatus.Failed.Message) 572 } else { 573 for _, c := range newStatus.Containers { 574 if c.LastExecError != "" { 575 err = errors.New(c.LastExecError) 576 break 577 } 578 } 579 } 580 581 resultSet := store.BuildResultSet{} 582 r.store.Dispatch(buildcontrols.NewBuildCompleteAction(manifestName, LiveUpdateSource, spanID, resultSet, err)) 583 } 584 585 func (r *Reconciler) resource(lu *v1alpha1.LiveUpdate, monitor *monitor) (luResource, error) { 586 k := lu.Spec.Selector.Kubernetes 587 if k != nil { 588 r, err := k8sconv.NewKubernetesResource(monitor.lastKubernetesDiscovery, monitor.lastKubernetesApplyStatus) 589 if err != nil || r == nil { 590 return nil, fmt.Errorf("creating kube resource: %v", err) 591 } 592 return &luK8sResource{ 593 selector: k, 594 res: r, 595 im: monitor.lastImageMap, 596 }, nil 597 } 598 dc := lu.Spec.Selector.DockerCompose 599 if dc != nil { 600 if monitor.lastDockerComposeService == nil { 601 return nil, fmt.Errorf("no docker compose status") 602 } 603 return &luDCResource{ 604 selector: dc, 605 res: monitor.lastDockerComposeService, 606 }, nil 607 } 608 return nil, fmt.Errorf("No valid selector") 609 } 610 611 // Convert the currently tracked state into a set of inputs 612 // to the updater, then apply them. 613 func (r *Reconciler) maybeSync(ctx context.Context, lu *v1alpha1.LiveUpdate, monitor *monitor) v1alpha1.LiveUpdateStatus { 614 var status v1alpha1.LiveUpdateStatus 615 resource, err := r.resource(lu, monitor) 616 if err != nil { 617 status.Failed = createFailedState(lu, "Invalid", err.Error()) 618 return status 619 } 620 621 manifestName := lu.Annotations[v1alpha1.AnnotationManifest] 622 updateMode := lu.Annotations[liveupdate.AnnotationUpdateMode] 623 inTriggerQueue := monitor.lastTriggerQueue != nil && manifestName != "" && 624 configmap.InTriggerQueue(monitor.lastTriggerQueue, types.NamespacedName{Name: manifestName}) 625 isUpdateModeManual := updateMode == liveupdate.UpdateModeManual 626 isWaitingOnTrigger := false 627 if isUpdateModeManual && !inTriggerQueue { 628 // In manual mode, we should always wait for a trigger before live updating anything. 629 isWaitingOnTrigger = true 630 } 631 632 r.garbageCollectFileChanges(resource, monitor) 633 r.garbageCollectMonitorContainers(resource, monitor) 634 635 // Go through all the container monitors, and check if any of them are unrecoverable. 636 // If they are, it's not important to figure out why. 637 resource.visitSelectedContainers(func(pod v1alpha1.Pod, c v1alpha1.Container) bool { 638 cKey := monitorContainerKey{ 639 containerID: c.ID, 640 podName: pod.Name, 641 namespace: pod.Namespace, 642 } 643 644 cStatus, ok := monitor.containers[cKey] 645 if ok && cStatus.failedReason != "" { 646 status.Failed = createFailedState(lu, cStatus.failedReason, cStatus.failedMessage) 647 return true 648 } 649 return false 650 }) 651 652 if status.Failed != nil { 653 return status 654 } 655 656 updateEventDispatched := false 657 658 // Visit all containers, apply changes, and return their statuses. 659 terminatedContainerPodName := "" 660 hasAnyFilesToSync := false 661 resource.visitSelectedContainers(func(pod v1alpha1.Pod, cInfo v1alpha1.Container) bool { 662 c := liveupdates.Container{ 663 ContainerID: container.ID(cInfo.ID), 664 ContainerName: container.Name(cInfo.Name), 665 PodID: k8s.PodID(pod.Name), 666 Namespace: k8s.Namespace(pod.Namespace), 667 } 668 cKey := monitorContainerKey{ 669 containerID: cInfo.ID, 670 podName: pod.Name, 671 namespace: pod.Namespace, 672 } 673 674 highWaterMark := r.startedTime 675 cStatus, ok := monitor.containers[cKey] 676 if ok && !cStatus.lastFileTimeSynced.IsZero() { 677 highWaterMark = cStatus.lastFileTimeSynced 678 } 679 680 // Determine the changed files. 681 filesChanged := []string{} 682 newHighWaterMark := highWaterMark 683 newLowWaterMark := metav1.MicroTime{} 684 for _, source := range monitor.sources { 685 for f, t := range source.modTimeByPath { 686 if t.After(highWaterMark.Time) { 687 filesChanged = append(filesChanged, f) 688 689 if newLowWaterMark.IsZero() || t.Before(&newLowWaterMark) { 690 newLowWaterMark = t 691 } 692 693 if t.After(newHighWaterMark.Time) { 694 newHighWaterMark = t 695 } 696 } 697 } 698 } 699 700 // Sort the files so that they're deterministic. 701 filesChanged = sliceutils.DedupedAndSorted(filesChanged) 702 if len(filesChanged) > 0 { 703 hasAnyFilesToSync = true 704 } 705 706 // Ignore completed pods/containers. 707 // This is a bit tricky to handle correctly, but is handled at 708 // the end of this function. 709 if pod.Phase == string(v1.PodSucceeded) || pod.Phase == string(v1.PodFailed) || cInfo.State.Terminated != nil { 710 if terminatedContainerPodName == "" { 711 terminatedContainerPodName = pod.Name 712 } 713 return false 714 } 715 716 var waiting *v1alpha1.LiveUpdateContainerStateWaiting 717 718 // We interpret "no container id" as a waiting state 719 // (terminated states should have been caught above). 720 if cInfo.State.Running == nil || cInfo.ID == "" { 721 waiting = &v1alpha1.LiveUpdateContainerStateWaiting{ 722 Reason: "ContainerWaiting", 723 Message: "Waiting for container to start", 724 } 725 } else if isWaitingOnTrigger { 726 waiting = &v1alpha1.LiveUpdateContainerStateWaiting{ 727 Reason: "Trigger", 728 Message: "Only updates on manual trigger", 729 } 730 } 731 732 // Create a plan to update the container. 733 filesApplied := false 734 var oneUpdateStatus v1alpha1.LiveUpdateStatus 735 plan, failed := r.createLiveUpdatePlan(lu.Spec, filesChanged) 736 if failed != nil { 737 // The plan told us to stop updating - this container is unrecoverable. 738 oneUpdateStatus.Failed = failed 739 } else if len(plan.SyncPaths) == 0 { 740 // The plan told us that there are no updates to do. 741 oneUpdateStatus.Containers = []v1alpha1.LiveUpdateContainerStatus{{ 742 ContainerName: cInfo.Name, 743 ContainerID: cInfo.ID, 744 PodName: pod.Name, 745 Namespace: pod.Namespace, 746 LastFileTimeSynced: cStatus.lastFileTimeSynced, 747 Waiting: waiting, 748 }} 749 } else if cInfo.State.Waiting != nil && cInfo.State.Waiting.Reason == "CrashLoopBackOff" { 750 // At this point, the plan told us that we have some files to sync. 751 // Check if the container is in a state to receive those updates. 752 753 // If the container is crashlooping, that means it might not be up long enough 754 // to be able to receive a live-update. Treat this as an unrecoverable failure case. 755 oneUpdateStatus.Failed = createFailedState(lu, "CrashLoopBackOff", 756 fmt.Sprintf("Cannot live update because container crashing. Pod: %s", pod.Name)) 757 758 } else if waiting != nil { 759 // Mark the container as waiting, so we have a record of it. No need to sync any files. 760 oneUpdateStatus.Containers = []v1alpha1.LiveUpdateContainerStatus{{ 761 ContainerName: cInfo.Name, 762 ContainerID: cInfo.ID, 763 PodName: pod.Name, 764 Namespace: pod.Namespace, 765 LastFileTimeSynced: cStatus.lastFileTimeSynced, 766 Waiting: waiting, 767 }} 768 } else { 769 // Log progress and treat this as an update in the engine state. 770 if !updateEventDispatched { 771 updateEventDispatched = true 772 r.dispatchStartBuildAction(ctx, lu, filesChanged) 773 } 774 775 // Apply the change to the container. 776 oneUpdateStatus = r.applyInternal(ctx, lu.Spec, Input{ 777 IsDC: lu.Spec.Selector.DockerCompose != nil, 778 ChangedFiles: plan.SyncPaths, 779 Containers: []liveupdates.Container{c}, 780 LastFileTimeSynced: newHighWaterMark, 781 }) 782 filesApplied = true 783 } 784 785 // Merge the status from the single update into the overall liveupdate status. 786 adjustFailedStateTimestamps(lu, &oneUpdateStatus) 787 788 // Update the monitor based on the result of the applied changes. 789 if oneUpdateStatus.Failed != nil { 790 cStatus.failedReason = oneUpdateStatus.Failed.Reason 791 cStatus.failedMessage = oneUpdateStatus.Failed.Message 792 cStatus.failedLowWaterMark = newLowWaterMark 793 } else if filesApplied { 794 cStatus.lastFileTimeSynced = newHighWaterMark 795 } 796 monitor.containers[cKey] = cStatus 797 798 // Update the status based on the result of the applied changes. 799 if oneUpdateStatus.Failed != nil { 800 status.Failed = oneUpdateStatus.Failed 801 status.Containers = nil 802 return true 803 } 804 805 status.Containers = append(status.Containers, oneUpdateStatus.Containers...) 806 return false 807 }) 808 809 // If the only containers we're connected to are terminated containers, 810 // there are two cases we need to worry about: 811 // 812 // 1) The pod has completed, and will never run again (like a Job). 813 // 2) This is an old pod, and we're waiting for the new pod to rollout. 814 // 815 // We don't really have a great way to distinguish between these two cases. 816 // 817 // If we get to the end of this loop and haven't found any "live" pods, 818 // we assume we're in state (1) (to prevent waiting forever). 819 if status.Failed == nil && terminatedContainerPodName != "" && 820 hasAnyFilesToSync && len(status.Containers) == 0 { 821 status.Failed = createFailedState(lu, "Terminated", 822 fmt.Sprintf("Container for live update is stopped. Pod name: %s", terminatedContainerPodName)) 823 } 824 825 if updateEventDispatched { 826 r.dispatchCompleteBuildAction(lu, status) 827 } 828 829 return status 830 } 831 832 func (r *Reconciler) createLiveUpdatePlan(spec v1alpha1.LiveUpdateSpec, filesChanged []string) (liveupdates.LiveUpdatePlan, *v1alpha1.LiveUpdateStateFailed) { 833 plan, err := liveupdates.NewLiveUpdatePlan(spec, filesChanged) 834 if err != nil { 835 return plan, &v1alpha1.LiveUpdateStateFailed{ 836 Reason: "UpdateStopped", 837 Message: fmt.Sprintf("No update plan: %v", err), 838 } 839 } 840 841 if len(plan.NoMatchPaths) > 0 { 842 return plan, &v1alpha1.LiveUpdateStateFailed{ 843 Reason: "UpdateStopped", 844 Message: fmt.Sprintf("Found file(s) not matching any sync (files: %s)", 845 ospath.FormatFileChangeList(plan.NoMatchPaths)), 846 } 847 } 848 849 // If any changed files match a FallBackOn file, fall back to next BuildAndDeployer 850 if len(plan.StopPaths) != 0 { 851 return plan, &v1alpha1.LiveUpdateStateFailed{ 852 Reason: "UpdateStopped", 853 Message: fmt.Sprintf("Detected change to stop file %q", plan.StopPaths[0]), 854 } 855 } 856 return plan, nil 857 } 858 859 // Generate the correct transition time on the Failed state. 860 func adjustFailedStateTimestamps(obj *v1alpha1.LiveUpdate, newStatus *v1alpha1.LiveUpdateStatus) { 861 if newStatus.Failed == nil { 862 return 863 } 864 865 newStatus.Failed = createFailedState(obj, newStatus.Failed.Reason, newStatus.Failed.Message) 866 } 867 868 // Create a new failed state and update the transition timestamp if appropriate. 869 func createFailedState(obj *v1alpha1.LiveUpdate, reason, msg string) *v1alpha1.LiveUpdateStateFailed { 870 failed := &v1alpha1.LiveUpdateStateFailed{Reason: reason, Message: msg} 871 transitionTime := apis.NowMicro() 872 if obj.Status.Failed != nil && obj.Status.Failed.Reason == failed.Reason { 873 // If the reason hasn't changed, don't treat this as a transition. 874 transitionTime = obj.Status.Failed.LastTransitionTime 875 } 876 877 failed.LastTransitionTime = transitionTime 878 return failed 879 } 880 881 // Like apply, but doesn't write the status to the apiserver. 882 func (r *Reconciler) applyInternal( 883 ctx context.Context, 884 spec v1alpha1.LiveUpdateSpec, 885 input Input) v1alpha1.LiveUpdateStatus { 886 887 var result v1alpha1.LiveUpdateStatus 888 cu := r.containerUpdater(input) 889 l := logger.Get(ctx) 890 containers := input.Containers 891 names := liveupdates.ContainerDisplayNames(containers) 892 suffix := "" 893 if len(containers) != 1 { 894 suffix = "(s)" 895 } 896 897 runSteps := liveupdate.RunSteps(spec) 898 changedFiles := input.ChangedFiles 899 hotReload := !liveupdate.ShouldRestart(spec) 900 boiledSteps, err := build.BoilRuns(runSteps, changedFiles) 901 if err != nil { 902 result.Failed = &v1alpha1.LiveUpdateStateFailed{ 903 Reason: "Invalid", 904 Message: fmt.Sprintf("Building exec: %v", err), 905 } 906 return result 907 } 908 909 // rm files from container 910 toRemove, toArchive, err := build.MissingLocalPaths(ctx, changedFiles) 911 if err != nil { 912 result.Failed = &v1alpha1.LiveUpdateStateFailed{ 913 Reason: "Invalid", 914 Message: fmt.Sprintf("Mapping paths: %v", err), 915 } 916 return result 917 } 918 919 if len(toRemove) > 0 { 920 l.Infof("Will delete %d file(s) from container%s: %s", len(toRemove), suffix, names) 921 for _, pm := range toRemove { 922 l.Infof("- '%s' (matched local path: '%s')", pm.ContainerPath, pm.LocalPath) 923 } 924 } 925 926 if len(toArchive) > 0 { 927 l.Infof("Will copy %d file(s) to container%s: %s", len(toArchive), suffix, names) 928 for _, pm := range toArchive { 929 l.Infof("- %s", pm.PrettyStr()) 930 } 931 } 932 933 var lastExecErrorStatus *v1alpha1.LiveUpdateContainerStatus 934 for _, cInfo := range containers { 935 // TODO(nick): We should try to distinguish between cases where the tar writer 936 // fails (which is recoverable) vs when the server-side unpacking 937 // fails (which may not be recoverable). 938 archive := build.TarArchiveForPaths(ctx, toArchive, nil) 939 err = cu.UpdateContainer(ctx, cInfo, archive, 940 build.PathMappingsToContainerPaths(toRemove), boiledSteps, hotReload) 941 _ = archive.Close() 942 943 lastFileTimeSynced := input.LastFileTimeSynced 944 if lastFileTimeSynced.IsZero() { 945 lastFileTimeSynced = apis.NowMicro() 946 } 947 948 cStatus := v1alpha1.LiveUpdateContainerStatus{ 949 ContainerName: cInfo.ContainerName.String(), 950 ContainerID: cInfo.ContainerID.String(), 951 PodName: cInfo.PodID.String(), 952 Namespace: string(cInfo.Namespace), 953 LastFileTimeSynced: lastFileTimeSynced, 954 } 955 956 if err != nil { 957 if build.IsRunStepFailure(err) { 958 // Keep running updates -- we want all containers to have the same files on them 959 // even if the Runs don't succeed 960 logger.Get(ctx).Infof(" → Failed to update container %s: %v", 961 cInfo.DisplayName(), err) 962 cStatus.LastExecError = err.Error() 963 lastExecErrorStatus = &cStatus 964 } else { 965 // Something went wrong with this update and it's NOT the user's fault-- 966 // likely a infrastructure error. Bail, and fall back to full build. 967 msg := "" 968 if cStatus.PodName != "" { 969 msg = fmt.Sprintf("Updating pod %s: %v", cStatus.PodName, err) 970 } else { 971 msg = fmt.Sprintf("Updating container %s: %v", cInfo.DisplayName(), err) 972 } 973 result.Failed = &v1alpha1.LiveUpdateStateFailed{ 974 Reason: "UpdateFailed", 975 Message: msg, 976 } 977 return result 978 } 979 } else { 980 logger.Get(ctx).Infof(" → Container %s updated!", cInfo.DisplayName()) 981 if lastExecErrorStatus != nil { 982 // This build succeeded, but previously at least one failed due to user error. 983 // We may have inconsistent state--bail, and fall back to full build. 984 result.Failed = &v1alpha1.LiveUpdateStateFailed{ 985 Reason: "PodsInconsistent", 986 Message: fmt.Sprintf("Pods in inconsistent state. Success: pod %s. Failure: pod %s. Error: %v", 987 cStatus.PodName, lastExecErrorStatus.PodName, lastExecErrorStatus.LastExecError), 988 } 989 return result 990 } 991 } 992 993 result.Containers = append(result.Containers, cStatus) 994 } 995 return result 996 } 997 998 func (r *Reconciler) containerUpdater(input Input) containerupdate.ContainerUpdater { 999 isDC := input.IsDC 1000 if isDC || r.updateMode == liveupdates.UpdateModeContainer { 1001 return r.DockerUpdater 1002 } 1003 1004 if r.updateMode == liveupdates.UpdateModeKubectlExec { 1005 return r.ExecUpdater 1006 } 1007 1008 dcu, ok := r.DockerUpdater.(*containerupdate.DockerUpdater) 1009 if ok && dcu.WillBuildToKubeContext(r.kubeContext) { 1010 return r.DockerUpdater 1011 } 1012 1013 return r.ExecUpdater 1014 } 1015 1016 func (r *Reconciler) CreateBuilder(mgr ctrl.Manager) (*builder.Builder, error) { 1017 b := ctrl.NewControllerManagedBy(mgr). 1018 For(&v1alpha1.LiveUpdate{}). 1019 Watches(&v1alpha1.KubernetesDiscovery{}, 1020 handler.EnqueueRequestsFromMapFunc(r.indexer.Enqueue)). 1021 Watches(&v1alpha1.KubernetesApply{}, 1022 handler.EnqueueRequestsFromMapFunc(r.indexer.Enqueue)). 1023 Watches(&v1alpha1.DockerComposeService{}, 1024 handler.EnqueueRequestsFromMapFunc(r.indexer.Enqueue)). 1025 Watches(&v1alpha1.FileWatch{}, 1026 handler.EnqueueRequestsFromMapFunc(r.indexer.Enqueue)). 1027 Watches(&v1alpha1.ImageMap{}, 1028 handler.EnqueueRequestsFromMapFunc(r.indexer.Enqueue)). 1029 Watches(&v1alpha1.ConfigMap{}, 1030 handler.EnqueueRequestsFromMapFunc(r.enqueueTriggerQueue)) 1031 1032 return b, nil 1033 } 1034 1035 // Find any objects we need to reconcile based on the trigger queue. 1036 func (r *Reconciler) enqueueTriggerQueue(ctx context.Context, obj client.Object) []reconcile.Request { 1037 cm, ok := obj.(*v1alpha1.ConfigMap) 1038 if !ok { 1039 return nil 1040 } 1041 1042 if cm.Name != configmap.TriggerQueueName { 1043 return nil 1044 } 1045 1046 // We can only trigger liveupdates that have run once, so search 1047 // through the map of known liveupdates 1048 names := configmap.NamesInTriggerQueue(cm) 1049 nameSet := make(map[string]bool) 1050 for _, name := range names { 1051 nameSet[name] = true 1052 } 1053 1054 r.mu.Lock() 1055 defer r.mu.Unlock() 1056 1057 requests := []reconcile.Request{} 1058 for name, monitor := range r.monitors { 1059 if nameSet[monitor.manifestName] { 1060 requests = append(requests, reconcile.Request{NamespacedName: types.NamespacedName{Name: name}}) 1061 } 1062 } 1063 return requests 1064 } 1065 1066 // indexLiveUpdate returns keys of objects referenced _by_ the LiveUpdate object for reverse lookup including: 1067 // - DockerComposeService 1068 // - FileWatch 1069 // - ImageMap 1070 // - KubernetesDiscovery 1071 // - KubernetesApply 1072 func indexLiveUpdate(obj ctrlclient.Object) []indexer.Key { 1073 lu := obj.(*v1alpha1.LiveUpdate) 1074 var result []indexer.Key 1075 1076 for _, s := range lu.Spec.Sources { 1077 fwn := s.FileWatch 1078 imn := s.ImageMap 1079 if fwn != "" { 1080 result = append(result, indexer.Key{ 1081 Name: types.NamespacedName{ 1082 Namespace: lu.Namespace, 1083 Name: fwn, 1084 }, 1085 GVK: fwGVK, 1086 }) 1087 } 1088 1089 if imn != "" { 1090 result = append(result, indexer.Key{ 1091 Name: types.NamespacedName{ 1092 Namespace: lu.Namespace, 1093 Name: imn, 1094 }, 1095 GVK: imageMapGVK, 1096 }) 1097 } 1098 } 1099 1100 if kSel := lu.Spec.Selector.Kubernetes; kSel != nil { 1101 if kSel.DiscoveryName != "" { 1102 result = append(result, indexer.Key{ 1103 Name: types.NamespacedName{ 1104 Namespace: lu.Namespace, 1105 Name: kSel.DiscoveryName, 1106 }, 1107 GVK: discoveryGVK, 1108 }) 1109 } 1110 1111 if kSel.ApplyName != "" { 1112 result = append(result, indexer.Key{ 1113 Name: types.NamespacedName{ 1114 Namespace: lu.Namespace, 1115 Name: kSel.ApplyName, 1116 }, 1117 GVK: applyGVK, 1118 }) 1119 } 1120 1121 if kSel.ImageMapName != "" { 1122 result = append(result, indexer.Key{ 1123 Name: types.NamespacedName{ 1124 Namespace: lu.Namespace, 1125 Name: kSel.ImageMapName, 1126 }, 1127 GVK: imageMapGVK, 1128 }) 1129 } 1130 } 1131 if lu.Spec.Selector.DockerCompose != nil && lu.Spec.Selector.DockerCompose.Service != "" { 1132 result = append(result, indexer.Key{ 1133 Name: types.NamespacedName{ 1134 Namespace: lu.Namespace, 1135 Name: lu.Spec.Selector.DockerCompose.Service, 1136 }, 1137 GVK: dcsGVK, 1138 }) 1139 } 1140 return result 1141 }