github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/cluster/reconciler.go (about) 1 package cluster 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "time" 9 10 "github.com/docker/docker/client" 11 "github.com/jonboulle/clockwork" 12 apierrors "k8s.io/apimachinery/pkg/api/errors" 13 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 "k8s.io/apimachinery/pkg/runtime/schema" 15 "k8s.io/apimachinery/pkg/types" 16 "k8s.io/cli-runtime/pkg/printers" 17 "k8s.io/client-go/tools/clientcmd/api" 18 "k8s.io/client-go/tools/clientcmd/api/latest" 19 ctrl "sigs.k8s.io/controller-runtime" 20 "sigs.k8s.io/controller-runtime/pkg/builder" 21 ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" 22 "sigs.k8s.io/controller-runtime/pkg/reconcile" 23 24 "github.com/tilt-dev/tilt/internal/analytics" 25 "github.com/tilt-dev/tilt/internal/container" 26 "github.com/tilt-dev/tilt/internal/controllers/apicmp" 27 "github.com/tilt-dev/tilt/internal/controllers/indexer" 28 "github.com/tilt-dev/tilt/internal/docker" 29 "github.com/tilt-dev/tilt/internal/hud/server" 30 "github.com/tilt-dev/tilt/internal/k8s" 31 "github.com/tilt-dev/tilt/internal/store" 32 "github.com/tilt-dev/tilt/internal/store/clusters" 33 "github.com/tilt-dev/tilt/internal/xdg" 34 "github.com/tilt-dev/tilt/pkg/apis" 35 "github.com/tilt-dev/tilt/pkg/apis/core/v1alpha1" 36 "github.com/tilt-dev/tilt/pkg/logger" 37 "github.com/tilt-dev/tilt/pkg/model" 38 ) 39 40 const ArchUnknown string = "unknown" 41 42 const ( 43 clientInitBackoff = 30 * time.Second 44 clientHealthPollInterval = 15 * time.Second 45 ) 46 47 type Reconciler struct { 48 globalCtx context.Context 49 ctrlClient ctrlclient.Client 50 store store.RStore 51 requeuer *indexer.Requeuer 52 clock clockwork.Clock 53 connManager *ConnectionManager 54 base xdg.Base 55 apiServerName model.APIServerName 56 57 localDockerEnv docker.LocalEnv 58 dockerClientFactory DockerClientFactory 59 60 k8sClientFactory KubernetesClientFactory 61 wsList *server.WebsocketList 62 63 clusterHealth *clusterHealthMonitor 64 } 65 66 func (r *Reconciler) CreateBuilder(mgr ctrl.Manager) (*builder.Builder, error) { 67 b := ctrl.NewControllerManagedBy(mgr). 68 For(&v1alpha1.Cluster{}). 69 WatchesRawSource(r.requeuer) 70 return b, nil 71 } 72 73 func NewReconciler( 74 globalCtx context.Context, 75 ctrlClient ctrlclient.Client, 76 store store.RStore, 77 clock clockwork.Clock, 78 connManager *ConnectionManager, 79 localDockerEnv docker.LocalEnv, 80 dockerClientFactory DockerClientFactory, 81 k8sClientFactory KubernetesClientFactory, 82 wsList *server.WebsocketList, 83 base xdg.Base, 84 apiServerName model.APIServerName, 85 ) *Reconciler { 86 requeuer := indexer.NewRequeuer() 87 88 return &Reconciler{ 89 globalCtx: globalCtx, 90 ctrlClient: ctrlClient, 91 store: store, 92 clock: clock, 93 requeuer: requeuer, 94 connManager: connManager, 95 localDockerEnv: localDockerEnv, 96 dockerClientFactory: dockerClientFactory, 97 k8sClientFactory: k8sClientFactory, 98 wsList: wsList, 99 clusterHealth: newClusterHealthMonitor(globalCtx, clock, requeuer), 100 base: base, 101 apiServerName: apiServerName, 102 } 103 } 104 105 func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { 106 nn := request.NamespacedName 107 ctx = store.WithManifestLogHandler(ctx, r.store, model.MainTiltfileManifestName, "cluster") 108 109 var obj v1alpha1.Cluster 110 err := r.ctrlClient.Get(ctx, nn, &obj) 111 if err != nil && !apierrors.IsNotFound(err) { 112 return ctrl.Result{}, err 113 } 114 115 if apierrors.IsNotFound(err) || !obj.ObjectMeta.DeletionTimestamp.IsZero() { 116 r.store.Dispatch(clusters.NewClusterDeleteAction(request.Name)) 117 r.cleanup(nn) 118 r.wsList.ForEach(func(ws *server.WebsocketSubscriber) { 119 ws.SendClusterUpdate(ctx, nn, nil) 120 }) 121 return ctrl.Result{}, nil 122 } 123 124 // The apiserver is the source of truth, and will ensure the engine state is up to date. 125 r.store.Dispatch(clusters.NewClusterUpsertAction(&obj)) 126 127 clusterRefreshEnabled := obj.Annotations["features.tilt.dev/cluster-refresh"] == "true" 128 conn, hasConnection := r.connManager.load(nn) 129 // If this is not the first time we've tried to connect to the cluster, 130 // only attempt to refresh the connection if the feature is enabled. Not 131 // all parts of Tilt use a dynamically-obtained client currently, which 132 // can result in erratic behavior if the cluster is not in a usable state 133 // at startup but then becomes usable, for example, as some parts of the 134 // system will still have k8s.explodingClient. 135 if hasConnection && clusterRefreshEnabled { 136 // If the spec changed, delete the connection and recreate it. 137 if !apicmp.DeepEqual(conn.spec, obj.Spec) { 138 r.cleanup(nn) 139 conn = connection{} 140 hasConnection = false 141 } else if conn.initError != "" && r.clock.Now().After(conn.createdAt.Add(clientInitBackoff)) { 142 hasConnection = false 143 } 144 } 145 146 var requeueAfter time.Duration 147 if !hasConnection { 148 // Create the initial connection to the cluster. 149 conn = connection{spec: *obj.Spec.DeepCopy(), createdAt: r.clock.Now()} 150 if obj.Spec.Connection != nil && obj.Spec.Connection.Kubernetes != nil { 151 conn.connType = connectionTypeK8s 152 client, err := r.createKubernetesClient(obj.DeepCopy()) 153 if err != nil { 154 var initError string 155 if !clusterRefreshEnabled { 156 initError = fmt.Sprintf( 157 "Tilt encountered an error connecting to your Kubernetes cluster:"+ 158 "\n\t%v"+ 159 "\nYou will need to restart Tilt after resolving the issue.", 160 err) 161 } else { 162 initError = err.Error() 163 } 164 conn.initError = initError 165 } else { 166 conn.k8sClient = client 167 } 168 } else if obj.Spec.Connection != nil && obj.Spec.Connection.Docker != nil { 169 conn.connType = connectionTypeDocker 170 client, err := r.createDockerClient(obj.Spec.Connection.Docker) 171 if err != nil { 172 conn.initError = err.Error() 173 } else { 174 conn.dockerClient = client 175 } 176 } 177 178 if conn.initError != "" { 179 // requeue the cluster Obj so that we can attempt to re-initialize 180 requeueAfter = clientInitBackoff 181 } else { 182 // start monitoring the connection and requeue the Cluster obj 183 // for reconciliation if its runtime status changes 184 r.clusterHealth.Start(nn, conn) 185 } 186 } 187 188 r.populateClusterMetadata(ctx, nn, &conn) 189 190 r.connManager.store(nn, conn) 191 192 status := conn.toStatus(r.clusterHealth.GetStatus(nn)) 193 err = r.maybeUpdateStatus(ctx, &obj, status) 194 if err != nil { 195 return ctrl.Result{}, err 196 } 197 198 r.wsList.ForEach(func(ws *server.WebsocketSubscriber) { 199 ws.SendClusterUpdate(ctx, nn, &obj) 200 }) 201 202 return ctrl.Result{RequeueAfter: requeueAfter}, nil 203 } 204 205 // Creates a docker connection from the spec. 206 func (r *Reconciler) createDockerClient(obj *v1alpha1.DockerClusterConnection) (docker.Client, error) { 207 // If no Host is specified, use the default Env from environment variables. 208 env := docker.Env(r.localDockerEnv) 209 if obj.Host != "" { 210 d, err := client.NewClientWithOpts(client.WithHost(obj.Host)) 211 env.Client = d 212 if err != nil { 213 env.Error = err 214 } 215 } 216 217 client, err := r.dockerClientFactory.New(r.globalCtx, env) 218 if err != nil { 219 return nil, err 220 } 221 return client, nil 222 } 223 224 // Creates a Kubernetes client from the spec. 225 func (r *Reconciler) createKubernetesClient(cluster *v1alpha1.Cluster) (k8s.Client, error) { 226 k8sKubeContextOverride := k8s.KubeContextOverride(cluster.Spec.Connection.Kubernetes.Context) 227 k8sNamespaceOverride := k8s.NamespaceOverride(cluster.Spec.Connection.Kubernetes.Namespace) 228 client, err := r.k8sClientFactory.New(r.globalCtx, k8sKubeContextOverride, k8sNamespaceOverride) 229 if err != nil { 230 return nil, err 231 } 232 return client, nil 233 } 234 235 // Reads the arch from a kubernetes cluster, or "unknown" if we can't 236 // figure out the architecture. 237 // 238 // Note that it's normal that users may not have access to the kubernetes 239 // arch if there are RBAC rules restricting read access on nodes. 240 // 241 // We only need to read SOME arch that the cluster supports. 242 func (r *Reconciler) readKubernetesArch(ctx context.Context, client k8s.Client) string { 243 nodeMetas, err := client.ListMeta(ctx, schema.GroupVersionKind{Version: "v1", Kind: "Node"}, "") 244 if err != nil || len(nodeMetas) == 0 { 245 return ArchUnknown 246 } 247 248 // https://github.com/kubernetes/enhancements/blob/0e4d5df19d396511fe41ed0860b0ab9b96f46a2d/keps/sig-node/793-node-os-arch-labels/README.md 249 // https://kubernetes.io/docs/reference/labels-annotations-taints/#kubernetes-io-arch 250 arch := nodeMetas[0].GetLabels()["kubernetes.io/arch"] 251 if arch == "" { 252 arch = nodeMetas[0].GetLabels()["beta.kubernetes.io/arch"] 253 } 254 255 if arch == "" { 256 return ArchUnknown 257 } 258 return arch 259 } 260 261 // Reads the arch from a Docker cluster, or "unknown" if we can't 262 // figure out the architecture. 263 func (r *Reconciler) readDockerArch(ctx context.Context, client docker.Client) string { 264 serverVersion, err := client.ServerVersion(ctx) 265 if err != nil { 266 return ArchUnknown 267 } 268 arch := serverVersion.Arch 269 if arch == "" { 270 return ArchUnknown 271 } 272 return arch 273 } 274 275 func (r *Reconciler) maybeUpdateStatus(ctx context.Context, obj *v1alpha1.Cluster, newStatus v1alpha1.ClusterStatus) error { 276 if apicmp.DeepEqual(obj.Status, newStatus) { 277 return nil 278 } 279 280 update := obj.DeepCopy() 281 oldStatus := update.Status 282 update.Status = newStatus 283 err := r.ctrlClient.Status().Update(ctx, update) 284 if err != nil { 285 return fmt.Errorf("updating cluster %s status: %v", obj.Name, err) 286 } 287 288 if newStatus.Error != "" && oldStatus.Error != newStatus.Error { 289 logger.Get(ctx).Errorf("Cluster status error: %v", newStatus.Error) 290 } 291 292 r.reportConnectionEvent(ctx, update) 293 294 return nil 295 } 296 297 func (r *Reconciler) reportConnectionEvent(ctx context.Context, cluster *v1alpha1.Cluster) { 298 tags := make(map[string]string) 299 300 if cluster.Spec.Connection != nil { 301 if cluster.Spec.Connection.Kubernetes != nil { 302 tags["type"] = "kubernetes" 303 } else if cluster.Spec.Connection.Docker != nil { 304 tags["type"] = "docker" 305 } 306 } 307 308 if cluster.Status.Arch != "" { 309 tags["arch"] = cluster.Status.Arch 310 } 311 312 if cluster.Status.Error == "" { 313 tags["status"] = "connected" 314 } else { 315 tags["status"] = "error" 316 } 317 318 analytics.Get(ctx).Incr("api.cluster.connect", tags) 319 } 320 321 func (r *Reconciler) populateClusterMetadata(ctx context.Context, clusterNN types.NamespacedName, conn *connection) { 322 if conn.initError != "" { 323 return 324 } 325 326 switch conn.connType { 327 case connectionTypeK8s: 328 r.populateK8sMetadata(ctx, clusterNN, conn) 329 case connectionTypeDocker: 330 r.populateDockerMetadata(ctx, conn) 331 } 332 } 333 334 func (r *Reconciler) populateK8sMetadata(ctx context.Context, clusterNN types.NamespacedName, conn *connection) { 335 if conn.arch == "" { 336 conn.arch = r.readKubernetesArch(ctx, conn.k8sClient) 337 } 338 339 if conn.registry == nil { 340 reg := conn.k8sClient.LocalRegistry(ctx) 341 if !container.IsEmptyRegistry(reg) { 342 // If we've found a local registry in the cluster at run-time, use that 343 // instead of the default_registry (if any) declared in the Tiltfile 344 logger.Get(ctx).Infof("Auto-detected local registry from environment: %s", reg) 345 346 if conn.spec.DefaultRegistry != nil { 347 // The user has specified a default registry in their Tiltfile, but it will be ignored. 348 logger.Get(ctx).Infof("Default registry specified, but will be ignored in favor of auto-detected registry.") 349 } 350 } else if conn.spec.DefaultRegistry != nil { 351 logger.Get(ctx).Debugf("Using default registry from Tiltfile: %s", conn.spec.DefaultRegistry) 352 } else { 353 logger.Get(ctx).Debugf( 354 "No local registry detected and no default registry set for cluster %q", 355 clusterNN.Name) 356 } 357 358 conn.registry = reg 359 } 360 361 if conn.connStatus == nil { 362 apiConfig := conn.k8sClient.APIConfig() 363 k8sStatus := &v1alpha1.KubernetesClusterConnectionStatus{ 364 Context: apiConfig.CurrentContext, 365 Product: string(k8s.ClusterProductFromAPIConfig(apiConfig)), 366 } 367 context, ok := apiConfig.Contexts[apiConfig.CurrentContext] 368 if ok { 369 k8sStatus.Namespace = context.Namespace 370 k8sStatus.Cluster = context.Cluster 371 } 372 k8sStatus.ConfigPath = r.writeFrozenKubeConfig(ctx, clusterNN, apiConfig) 373 374 conn.connStatus = &v1alpha1.ClusterConnectionStatus{ 375 Kubernetes: k8sStatus, 376 } 377 } 378 379 if conn.serverVersion == "" { 380 versionInfo, err := conn.k8sClient.CheckConnected(ctx) 381 if err == nil { 382 conn.serverVersion = versionInfo.String() 383 } 384 } 385 } 386 387 func (r *Reconciler) writeFrozenKubeConfig(ctx context.Context, nn types.NamespacedName, config *api.Config) string { 388 config = config.DeepCopy() 389 err := api.MinifyConfig(config) 390 if err != nil { 391 logger.Get(ctx).Warnf("Minifying Kubernetes config: %v", err) 392 return "" 393 } 394 395 err = api.FlattenConfig(config) 396 if err != nil { 397 logger.Get(ctx).Warnf("Flattening Kubernetes config: %v", err) 398 return "" 399 } 400 401 obj, err := latest.Scheme.ConvertToVersion(config, latest.ExternalVersion) 402 if err != nil { 403 logger.Get(ctx).Warnf("Converting Kubernetes config: %v", err) 404 return "" 405 } 406 407 printer := printers.YAMLPrinter{} 408 path, err := r.base.RuntimeFile( 409 filepath.Join(string(r.apiServerName), "cluster", fmt.Sprintf("%s.yml", nn.Name))) 410 if err != nil { 411 logger.Get(ctx).Warnf("Writing Kubernetes config: %v", err) 412 return "" 413 } 414 415 f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) 416 if err != nil { 417 logger.Get(ctx).Warnf("Writing Kubernetes config: %v", err) 418 return "" 419 } 420 defer func() { 421 _ = f.Close() 422 }() 423 424 err = printer.PrintObj(obj, f) 425 if err != nil { 426 logger.Get(ctx).Warnf("Writing Kubernetes config: %v", err) 427 return "" 428 } 429 return path 430 } 431 432 func (r *Reconciler) populateDockerMetadata(ctx context.Context, conn *connection) { 433 if conn.arch == "" { 434 conn.arch = r.readDockerArch(ctx, conn.dockerClient) 435 } 436 437 if conn.serverVersion == "" { 438 versionInfo, err := conn.dockerClient.ServerVersion(ctx) 439 if err == nil { 440 conn.serverVersion = versionInfo.Version 441 } 442 } 443 } 444 445 func (r *Reconciler) cleanup(clusterNN types.NamespacedName) { 446 r.clusterHealth.Stop(clusterNN) 447 r.connManager.delete(clusterNN) 448 } 449 450 func (c *connection) toStatus(statusErr string) v1alpha1.ClusterStatus { 451 var connectedAt *metav1.MicroTime 452 if c.initError == "" && !c.createdAt.IsZero() { 453 t := apis.NewMicroTime(c.createdAt) 454 connectedAt = &t 455 } 456 457 clusterError := c.initError 458 if clusterError == "" { 459 clusterError = statusErr 460 } 461 462 return v1alpha1.ClusterStatus{ 463 Error: clusterError, 464 Arch: c.arch, 465 Version: c.serverVersion, 466 ConnectedAt: connectedAt, 467 Registry: c.registry, 468 Connection: c.connStatus, 469 } 470 }