github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/cluster/reconciler.go (about)

     1  package cluster
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"time"
     9  
    10  	"github.com/docker/docker/client"
    11  	"github.com/jonboulle/clockwork"
    12  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    13  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    14  	"k8s.io/apimachinery/pkg/runtime/schema"
    15  	"k8s.io/apimachinery/pkg/types"
    16  	"k8s.io/cli-runtime/pkg/printers"
    17  	"k8s.io/client-go/tools/clientcmd/api"
    18  	"k8s.io/client-go/tools/clientcmd/api/latest"
    19  	ctrl "sigs.k8s.io/controller-runtime"
    20  	"sigs.k8s.io/controller-runtime/pkg/builder"
    21  	ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
    22  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    23  
    24  	"github.com/tilt-dev/tilt/internal/analytics"
    25  	"github.com/tilt-dev/tilt/internal/container"
    26  	"github.com/tilt-dev/tilt/internal/controllers/apicmp"
    27  	"github.com/tilt-dev/tilt/internal/controllers/indexer"
    28  	"github.com/tilt-dev/tilt/internal/docker"
    29  	"github.com/tilt-dev/tilt/internal/hud/server"
    30  	"github.com/tilt-dev/tilt/internal/k8s"
    31  	"github.com/tilt-dev/tilt/internal/store"
    32  	"github.com/tilt-dev/tilt/internal/store/clusters"
    33  	"github.com/tilt-dev/tilt/internal/xdg"
    34  	"github.com/tilt-dev/tilt/pkg/apis"
    35  	"github.com/tilt-dev/tilt/pkg/apis/core/v1alpha1"
    36  	"github.com/tilt-dev/tilt/pkg/logger"
    37  	"github.com/tilt-dev/tilt/pkg/model"
    38  )
    39  
    40  const ArchUnknown string = "unknown"
    41  
    42  const (
    43  	clientInitBackoff        = 30 * time.Second
    44  	clientHealthPollInterval = 15 * time.Second
    45  )
    46  
    47  type Reconciler struct {
    48  	globalCtx     context.Context
    49  	ctrlClient    ctrlclient.Client
    50  	store         store.RStore
    51  	requeuer      *indexer.Requeuer
    52  	clock         clockwork.Clock
    53  	connManager   *ConnectionManager
    54  	base          xdg.Base
    55  	apiServerName model.APIServerName
    56  
    57  	localDockerEnv      docker.LocalEnv
    58  	dockerClientFactory DockerClientFactory
    59  
    60  	k8sClientFactory KubernetesClientFactory
    61  	wsList           *server.WebsocketList
    62  
    63  	clusterHealth *clusterHealthMonitor
    64  }
    65  
    66  func (r *Reconciler) CreateBuilder(mgr ctrl.Manager) (*builder.Builder, error) {
    67  	b := ctrl.NewControllerManagedBy(mgr).
    68  		For(&v1alpha1.Cluster{}).
    69  		WatchesRawSource(r.requeuer)
    70  	return b, nil
    71  }
    72  
    73  func NewReconciler(
    74  	globalCtx context.Context,
    75  	ctrlClient ctrlclient.Client,
    76  	store store.RStore,
    77  	clock clockwork.Clock,
    78  	connManager *ConnectionManager,
    79  	localDockerEnv docker.LocalEnv,
    80  	dockerClientFactory DockerClientFactory,
    81  	k8sClientFactory KubernetesClientFactory,
    82  	wsList *server.WebsocketList,
    83  	base xdg.Base,
    84  	apiServerName model.APIServerName,
    85  ) *Reconciler {
    86  	requeuer := indexer.NewRequeuer()
    87  
    88  	return &Reconciler{
    89  		globalCtx:           globalCtx,
    90  		ctrlClient:          ctrlClient,
    91  		store:               store,
    92  		clock:               clock,
    93  		requeuer:            requeuer,
    94  		connManager:         connManager,
    95  		localDockerEnv:      localDockerEnv,
    96  		dockerClientFactory: dockerClientFactory,
    97  		k8sClientFactory:    k8sClientFactory,
    98  		wsList:              wsList,
    99  		clusterHealth:       newClusterHealthMonitor(globalCtx, clock, requeuer),
   100  		base:                base,
   101  		apiServerName:       apiServerName,
   102  	}
   103  }
   104  
   105  func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
   106  	nn := request.NamespacedName
   107  	ctx = store.WithManifestLogHandler(ctx, r.store, model.MainTiltfileManifestName, "cluster")
   108  
   109  	var obj v1alpha1.Cluster
   110  	err := r.ctrlClient.Get(ctx, nn, &obj)
   111  	if err != nil && !apierrors.IsNotFound(err) {
   112  		return ctrl.Result{}, err
   113  	}
   114  
   115  	if apierrors.IsNotFound(err) || !obj.ObjectMeta.DeletionTimestamp.IsZero() {
   116  		r.store.Dispatch(clusters.NewClusterDeleteAction(request.Name))
   117  		r.cleanup(nn)
   118  		r.wsList.ForEach(func(ws *server.WebsocketSubscriber) {
   119  			ws.SendClusterUpdate(ctx, nn, nil)
   120  		})
   121  		return ctrl.Result{}, nil
   122  	}
   123  
   124  	// The apiserver is the source of truth, and will ensure the engine state is up to date.
   125  	r.store.Dispatch(clusters.NewClusterUpsertAction(&obj))
   126  
   127  	clusterRefreshEnabled := obj.Annotations["features.tilt.dev/cluster-refresh"] == "true"
   128  	conn, hasConnection := r.connManager.load(nn)
   129  	// If this is not the first time we've tried to connect to the cluster,
   130  	// only attempt to refresh the connection if the feature is enabled. Not
   131  	// all parts of Tilt use a dynamically-obtained client currently, which
   132  	// can result in erratic behavior if the cluster is not in a usable state
   133  	// at startup but then becomes usable, for example, as some parts of the
   134  	// system will still have k8s.explodingClient.
   135  	if hasConnection && clusterRefreshEnabled {
   136  		// If the spec changed, delete the connection and recreate it.
   137  		if !apicmp.DeepEqual(conn.spec, obj.Spec) {
   138  			r.cleanup(nn)
   139  			conn = connection{}
   140  			hasConnection = false
   141  		} else if conn.initError != "" && r.clock.Now().After(conn.createdAt.Add(clientInitBackoff)) {
   142  			hasConnection = false
   143  		}
   144  	}
   145  
   146  	var requeueAfter time.Duration
   147  	if !hasConnection {
   148  		// Create the initial connection to the cluster.
   149  		conn = connection{spec: *obj.Spec.DeepCopy(), createdAt: r.clock.Now()}
   150  		if obj.Spec.Connection != nil && obj.Spec.Connection.Kubernetes != nil {
   151  			conn.connType = connectionTypeK8s
   152  			client, err := r.createKubernetesClient(obj.DeepCopy())
   153  			if err != nil {
   154  				var initError string
   155  				if !clusterRefreshEnabled {
   156  					initError = fmt.Sprintf(
   157  						"Tilt encountered an error connecting to your Kubernetes cluster:"+
   158  							"\n\t%v"+
   159  							"\nYou will need to restart Tilt after resolving the issue.",
   160  						err)
   161  				} else {
   162  					initError = err.Error()
   163  				}
   164  				conn.initError = initError
   165  			} else {
   166  				conn.k8sClient = client
   167  			}
   168  		} else if obj.Spec.Connection != nil && obj.Spec.Connection.Docker != nil {
   169  			conn.connType = connectionTypeDocker
   170  			client, err := r.createDockerClient(obj.Spec.Connection.Docker)
   171  			if err != nil {
   172  				conn.initError = err.Error()
   173  			} else {
   174  				conn.dockerClient = client
   175  			}
   176  		}
   177  
   178  		if conn.initError != "" {
   179  			// requeue the cluster Obj so that we can attempt to re-initialize
   180  			requeueAfter = clientInitBackoff
   181  		} else {
   182  			// start monitoring the connection and requeue the Cluster obj
   183  			// for reconciliation if its runtime status changes
   184  			r.clusterHealth.Start(nn, conn)
   185  		}
   186  	}
   187  
   188  	r.populateClusterMetadata(ctx, nn, &conn)
   189  
   190  	r.connManager.store(nn, conn)
   191  
   192  	status := conn.toStatus(r.clusterHealth.GetStatus(nn))
   193  	err = r.maybeUpdateStatus(ctx, &obj, status)
   194  	if err != nil {
   195  		return ctrl.Result{}, err
   196  	}
   197  
   198  	r.wsList.ForEach(func(ws *server.WebsocketSubscriber) {
   199  		ws.SendClusterUpdate(ctx, nn, &obj)
   200  	})
   201  
   202  	return ctrl.Result{RequeueAfter: requeueAfter}, nil
   203  }
   204  
   205  // Creates a docker connection from the spec.
   206  func (r *Reconciler) createDockerClient(obj *v1alpha1.DockerClusterConnection) (docker.Client, error) {
   207  	// If no Host is specified, use the default Env from environment variables.
   208  	env := docker.Env(r.localDockerEnv)
   209  	if obj.Host != "" {
   210  		d, err := client.NewClientWithOpts(client.WithHost(obj.Host))
   211  		env.Client = d
   212  		if err != nil {
   213  			env.Error = err
   214  		}
   215  	}
   216  
   217  	client, err := r.dockerClientFactory.New(r.globalCtx, env)
   218  	if err != nil {
   219  		return nil, err
   220  	}
   221  	return client, nil
   222  }
   223  
   224  // Creates a Kubernetes client from the spec.
   225  func (r *Reconciler) createKubernetesClient(cluster *v1alpha1.Cluster) (k8s.Client, error) {
   226  	k8sKubeContextOverride := k8s.KubeContextOverride(cluster.Spec.Connection.Kubernetes.Context)
   227  	k8sNamespaceOverride := k8s.NamespaceOverride(cluster.Spec.Connection.Kubernetes.Namespace)
   228  	client, err := r.k8sClientFactory.New(r.globalCtx, k8sKubeContextOverride, k8sNamespaceOverride)
   229  	if err != nil {
   230  		return nil, err
   231  	}
   232  	return client, nil
   233  }
   234  
   235  // Reads the arch from a kubernetes cluster, or "unknown" if we can't
   236  // figure out the architecture.
   237  //
   238  // Note that it's normal that users may not have access to the kubernetes
   239  // arch if there are RBAC rules restricting read access on nodes.
   240  //
   241  // We only need to read SOME arch that the cluster supports.
   242  func (r *Reconciler) readKubernetesArch(ctx context.Context, client k8s.Client) string {
   243  	nodeMetas, err := client.ListMeta(ctx, schema.GroupVersionKind{Version: "v1", Kind: "Node"}, "")
   244  	if err != nil || len(nodeMetas) == 0 {
   245  		return ArchUnknown
   246  	}
   247  
   248  	// https://github.com/kubernetes/enhancements/blob/0e4d5df19d396511fe41ed0860b0ab9b96f46a2d/keps/sig-node/793-node-os-arch-labels/README.md
   249  	// https://kubernetes.io/docs/reference/labels-annotations-taints/#kubernetes-io-arch
   250  	arch := nodeMetas[0].GetLabels()["kubernetes.io/arch"]
   251  	if arch == "" {
   252  		arch = nodeMetas[0].GetLabels()["beta.kubernetes.io/arch"]
   253  	}
   254  
   255  	if arch == "" {
   256  		return ArchUnknown
   257  	}
   258  	return arch
   259  }
   260  
   261  // Reads the arch from a Docker cluster, or "unknown" if we can't
   262  // figure out the architecture.
   263  func (r *Reconciler) readDockerArch(ctx context.Context, client docker.Client) string {
   264  	serverVersion, err := client.ServerVersion(ctx)
   265  	if err != nil {
   266  		return ArchUnknown
   267  	}
   268  	arch := serverVersion.Arch
   269  	if arch == "" {
   270  		return ArchUnknown
   271  	}
   272  	return arch
   273  }
   274  
   275  func (r *Reconciler) maybeUpdateStatus(ctx context.Context, obj *v1alpha1.Cluster, newStatus v1alpha1.ClusterStatus) error {
   276  	if apicmp.DeepEqual(obj.Status, newStatus) {
   277  		return nil
   278  	}
   279  
   280  	update := obj.DeepCopy()
   281  	oldStatus := update.Status
   282  	update.Status = newStatus
   283  	err := r.ctrlClient.Status().Update(ctx, update)
   284  	if err != nil {
   285  		return fmt.Errorf("updating cluster %s status: %v", obj.Name, err)
   286  	}
   287  
   288  	if newStatus.Error != "" && oldStatus.Error != newStatus.Error {
   289  		logger.Get(ctx).Errorf("Cluster status error: %v", newStatus.Error)
   290  	}
   291  
   292  	r.reportConnectionEvent(ctx, update)
   293  
   294  	return nil
   295  }
   296  
   297  func (r *Reconciler) reportConnectionEvent(ctx context.Context, cluster *v1alpha1.Cluster) {
   298  	tags := make(map[string]string)
   299  
   300  	if cluster.Spec.Connection != nil {
   301  		if cluster.Spec.Connection.Kubernetes != nil {
   302  			tags["type"] = "kubernetes"
   303  		} else if cluster.Spec.Connection.Docker != nil {
   304  			tags["type"] = "docker"
   305  		}
   306  	}
   307  
   308  	if cluster.Status.Arch != "" {
   309  		tags["arch"] = cluster.Status.Arch
   310  	}
   311  
   312  	if cluster.Status.Error == "" {
   313  		tags["status"] = "connected"
   314  	} else {
   315  		tags["status"] = "error"
   316  	}
   317  
   318  	analytics.Get(ctx).Incr("api.cluster.connect", tags)
   319  }
   320  
   321  func (r *Reconciler) populateClusterMetadata(ctx context.Context, clusterNN types.NamespacedName, conn *connection) {
   322  	if conn.initError != "" {
   323  		return
   324  	}
   325  
   326  	switch conn.connType {
   327  	case connectionTypeK8s:
   328  		r.populateK8sMetadata(ctx, clusterNN, conn)
   329  	case connectionTypeDocker:
   330  		r.populateDockerMetadata(ctx, conn)
   331  	}
   332  }
   333  
   334  func (r *Reconciler) populateK8sMetadata(ctx context.Context, clusterNN types.NamespacedName, conn *connection) {
   335  	if conn.arch == "" {
   336  		conn.arch = r.readKubernetesArch(ctx, conn.k8sClient)
   337  	}
   338  
   339  	if conn.registry == nil {
   340  		reg := conn.k8sClient.LocalRegistry(ctx)
   341  		if !container.IsEmptyRegistry(reg) {
   342  			// If we've found a local registry in the cluster at run-time, use that
   343  			// instead of the default_registry (if any) declared in the Tiltfile
   344  			logger.Get(ctx).Infof("Auto-detected local registry from environment: %s", reg)
   345  
   346  			if conn.spec.DefaultRegistry != nil {
   347  				// The user has specified a default registry in their Tiltfile, but it will be ignored.
   348  				logger.Get(ctx).Infof("Default registry specified, but will be ignored in favor of auto-detected registry.")
   349  			}
   350  		} else if conn.spec.DefaultRegistry != nil {
   351  			logger.Get(ctx).Debugf("Using default registry from Tiltfile: %s", conn.spec.DefaultRegistry)
   352  		} else {
   353  			logger.Get(ctx).Debugf(
   354  				"No local registry detected and no default registry set for cluster %q",
   355  				clusterNN.Name)
   356  		}
   357  
   358  		conn.registry = reg
   359  	}
   360  
   361  	if conn.connStatus == nil {
   362  		apiConfig := conn.k8sClient.APIConfig()
   363  		k8sStatus := &v1alpha1.KubernetesClusterConnectionStatus{
   364  			Context: apiConfig.CurrentContext,
   365  			Product: string(k8s.ClusterProductFromAPIConfig(apiConfig)),
   366  		}
   367  		context, ok := apiConfig.Contexts[apiConfig.CurrentContext]
   368  		if ok {
   369  			k8sStatus.Namespace = context.Namespace
   370  			k8sStatus.Cluster = context.Cluster
   371  		}
   372  		k8sStatus.ConfigPath = r.writeFrozenKubeConfig(ctx, clusterNN, apiConfig)
   373  
   374  		conn.connStatus = &v1alpha1.ClusterConnectionStatus{
   375  			Kubernetes: k8sStatus,
   376  		}
   377  	}
   378  
   379  	if conn.serverVersion == "" {
   380  		versionInfo, err := conn.k8sClient.CheckConnected(ctx)
   381  		if err == nil {
   382  			conn.serverVersion = versionInfo.String()
   383  		}
   384  	}
   385  }
   386  
   387  func (r *Reconciler) writeFrozenKubeConfig(ctx context.Context, nn types.NamespacedName, config *api.Config) string {
   388  	config = config.DeepCopy()
   389  	err := api.MinifyConfig(config)
   390  	if err != nil {
   391  		logger.Get(ctx).Warnf("Minifying Kubernetes config: %v", err)
   392  		return ""
   393  	}
   394  
   395  	err = api.FlattenConfig(config)
   396  	if err != nil {
   397  		logger.Get(ctx).Warnf("Flattening Kubernetes config: %v", err)
   398  		return ""
   399  	}
   400  
   401  	obj, err := latest.Scheme.ConvertToVersion(config, latest.ExternalVersion)
   402  	if err != nil {
   403  		logger.Get(ctx).Warnf("Converting Kubernetes config: %v", err)
   404  		return ""
   405  	}
   406  
   407  	printer := printers.YAMLPrinter{}
   408  	path, err := r.base.RuntimeFile(
   409  		filepath.Join(string(r.apiServerName), "cluster", fmt.Sprintf("%s.yml", nn.Name)))
   410  	if err != nil {
   411  		logger.Get(ctx).Warnf("Writing Kubernetes config: %v", err)
   412  		return ""
   413  	}
   414  
   415  	f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
   416  	if err != nil {
   417  		logger.Get(ctx).Warnf("Writing Kubernetes config: %v", err)
   418  		return ""
   419  	}
   420  	defer func() {
   421  		_ = f.Close()
   422  	}()
   423  
   424  	err = printer.PrintObj(obj, f)
   425  	if err != nil {
   426  		logger.Get(ctx).Warnf("Writing Kubernetes config: %v", err)
   427  		return ""
   428  	}
   429  	return path
   430  }
   431  
   432  func (r *Reconciler) populateDockerMetadata(ctx context.Context, conn *connection) {
   433  	if conn.arch == "" {
   434  		conn.arch = r.readDockerArch(ctx, conn.dockerClient)
   435  	}
   436  
   437  	if conn.serverVersion == "" {
   438  		versionInfo, err := conn.dockerClient.ServerVersion(ctx)
   439  		if err == nil {
   440  			conn.serverVersion = versionInfo.Version
   441  		}
   442  	}
   443  }
   444  
   445  func (r *Reconciler) cleanup(clusterNN types.NamespacedName) {
   446  	r.clusterHealth.Stop(clusterNN)
   447  	r.connManager.delete(clusterNN)
   448  }
   449  
   450  func (c *connection) toStatus(statusErr string) v1alpha1.ClusterStatus {
   451  	var connectedAt *metav1.MicroTime
   452  	if c.initError == "" && !c.createdAt.IsZero() {
   453  		t := apis.NewMicroTime(c.createdAt)
   454  		connectedAt = &t
   455  	}
   456  
   457  	clusterError := c.initError
   458  	if clusterError == "" {
   459  		clusterError = statusErr
   460  	}
   461  
   462  	return v1alpha1.ClusterStatus{
   463  		Error:       clusterError,
   464  		Arch:        c.arch,
   465  		Version:     c.serverVersion,
   466  		ConnectedAt: connectedAt,
   467  		Registry:    c.registry,
   468  		Connection:  c.connStatus,
   469  	}
   470  }