github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/pkg/client/docker/daemon.go (about)

     1  // Package docker contains the functions necessary to start or discover a Telepresence daemon running in a docker container.
     2  package docker
     3  
     4  import (
     5  	"bytes"
     6  	"context"
     7  	"encoding/json"
     8  	"errors"
     9  	"fmt"
    10  	"net"
    11  	"net/netip"
    12  	"net/url"
    13  	"os"
    14  	"path/filepath"
    15  	"runtime"
    16  	"strconv"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/docker/docker/api/types"
    21  	"github.com/docker/docker/api/types/container"
    22  	"github.com/docker/docker/api/types/filters"
    23  	dockerClient "github.com/docker/docker/client"
    24  	"google.golang.org/grpc"
    25  	"google.golang.org/grpc/credentials/insecure"
    26  	runtime2 "k8s.io/apimachinery/pkg/runtime"
    27  	"k8s.io/client-go/tools/clientcmd/api"
    28  
    29  	"github.com/datawire/dlib/dexec"
    30  	"github.com/datawire/dlib/dlog"
    31  	"github.com/datawire/dlib/dtime"
    32  	"github.com/telepresenceio/telepresence/v2/pkg/authenticator/patcher"
    33  	"github.com/telepresenceio/telepresence/v2/pkg/client"
    34  	"github.com/telepresenceio/telepresence/v2/pkg/client/cli/daemon"
    35  	"github.com/telepresenceio/telepresence/v2/pkg/client/docker/kubeauth"
    36  	"github.com/telepresenceio/telepresence/v2/pkg/dnet"
    37  	"github.com/telepresenceio/telepresence/v2/pkg/errcat"
    38  	"github.com/telepresenceio/telepresence/v2/pkg/filelocation"
    39  	"github.com/telepresenceio/telepresence/v2/pkg/proc"
    40  	"github.com/telepresenceio/telepresence/v2/pkg/shellquote"
    41  	"github.com/telepresenceio/telepresence/v2/pkg/version"
    42  )
    43  
    44  const (
    45  	telepresenceImage = "telepresence" // TODO: Point to docker.io/datawire and make it configurable
    46  	TpCache           = "/root/.cache/telepresence"
    47  	dockerTpConfig    = "/root/.config/telepresence"
    48  	dockerTpLog       = "/root/.cache/telepresence/logs"
    49  )
    50  
    51  var ClientImageName = telepresenceImage //nolint:gochecknoglobals // extension point
    52  
    53  // ClientImage returns the fully qualified name of the docker image that corresponds to
    54  // the version of the current executable.
    55  func ClientImage(ctx context.Context) string {
    56  	images := client.GetConfig(ctx).Images()
    57  	img := images.ClientImage(ctx)
    58  	if img == "" {
    59  		registry := images.Registry(ctx)
    60  		img = registry + "/" + ClientImageName + ":" + strings.TrimPrefix(version.Version, "v")
    61  	}
    62  	return img
    63  }
    64  
    65  // DaemonOptions returns the options necessary to pass to a docker run when starting a daemon container.
    66  func DaemonOptions(ctx context.Context, daemonID *daemon.Identifier) ([]string, *net.TCPAddr, error) {
    67  	as, err := dnet.FreePortsTCP(1)
    68  	if err != nil {
    69  		return nil, nil, err
    70  	}
    71  	addr := as[0]
    72  	opts := []string{
    73  		"--name", daemonID.ContainerName(),
    74  		"--network", "telepresence",
    75  		"--cap-add", "NET_ADMIN",
    76  		"--sysctl", "net.ipv6.conf.all.disable_ipv6=0",
    77  		"--device", "/dev/net/tun:/dev/net/tun",
    78  		"-e", fmt.Sprintf("TELEPRESENCE_UID=%d", os.Getuid()),
    79  		"-e", fmt.Sprintf("TELEPRESENCE_GID=%d", os.Getgid()),
    80  		"-p", fmt.Sprintf("%s:%d", addr, addr.Port),
    81  		"-v", fmt.Sprintf("%s:%s:ro", filelocation.AppUserConfigDir(ctx), dockerTpConfig),
    82  		"-v", fmt.Sprintf("%s:%s", filelocation.AppUserCacheDir(ctx), TpCache),
    83  		"-v", fmt.Sprintf("%s:%s", filelocation.AppUserLogDir(ctx), dockerTpLog),
    84  	}
    85  	cr := daemon.GetRequest(ctx)
    86  	for _, ep := range cr.ExposedPorts {
    87  		opts = append(opts, "-p", ep)
    88  	}
    89  	if cr.Hostname != "" {
    90  		opts = append(opts, "--hostname", cr.Hostname)
    91  	}
    92  	if runtime.GOOS == "linux" {
    93  		opts = append(opts, "--add-host", "host.docker.internal:host-gateway")
    94  	}
    95  	env := client.GetEnv(ctx)
    96  	if env.ScoutDisable {
    97  		opts = append(opts, "-e", "SCOUT_DISABLE=1")
    98  	}
    99  	return opts, addr, nil
   100  }
   101  
   102  // DaemonArgs returns the arguments to pass to a docker run when starting a container daemon.
   103  func DaemonArgs(daemonID *daemon.Identifier, port int) []string {
   104  	return []string{
   105  		"connector-foreground",
   106  		"--name", "docker-" + daemonID.String(),
   107  		"--address", fmt.Sprintf(":%d", port),
   108  		"--embed-network",
   109  	}
   110  }
   111  
   112  // ConnectDaemon connects to a containerized daemon at the given address.
   113  func ConnectDaemon(ctx context.Context, address string) (conn *grpc.ClientConn, err error) {
   114  	// Assume that the user daemon is running and connect to it using the given address instead of using a socket.
   115  	for i := 1; ; i++ {
   116  		if ctx.Err() != nil {
   117  			return nil, ctx.Err()
   118  		}
   119  		conn, err = grpc.DialContext(ctx, address,
   120  			grpc.WithTransportCredentials(insecure.NewCredentials()),
   121  			grpc.WithNoProxy(),
   122  			grpc.WithBlock(),
   123  			grpc.FailOnNonTempDialError(true))
   124  		if err != nil {
   125  			if i < 10 {
   126  				// It's likely that we were too quick. Let's take a nap and try again
   127  				time.Sleep(time.Duration(i*50) * time.Millisecond)
   128  				continue
   129  			}
   130  			return nil, err
   131  		}
   132  		return conn, nil
   133  	}
   134  }
   135  
   136  const (
   137  	kubeAuthPortFile = kubeauth.CommandName + ".port"
   138  	kubeConfigs      = "kube"
   139  )
   140  
   141  func readPortFile(ctx context.Context, portFile string, configFiles []string) (uint16, error) {
   142  	pb, err := os.ReadFile(portFile)
   143  	if err != nil {
   144  		return 0, err
   145  	}
   146  	var p kubeauth.PortFile
   147  	err = json.Unmarshal(pb, &p)
   148  	if err == nil {
   149  		if p.Kubeconfig == strings.Join(configFiles, string(filepath.ListSeparator)) {
   150  			return uint16(p.Port), nil
   151  		}
   152  		dlog.Debug(ctx, "kubeconfig used by kubeauth is no longer valid")
   153  	}
   154  	if err := os.Remove(portFile); err != nil {
   155  		return 0, err
   156  	}
   157  	return 0, os.ErrNotExist
   158  }
   159  
   160  func startAuthenticatorService(ctx context.Context, portFile string, kubeFlags map[string]string, configFiles []string) (uint16, error) {
   161  	// remove any stale port file
   162  	_ = os.Remove(portFile)
   163  
   164  	args := make([]string, 0, 4+len(kubeFlags)*2)
   165  	args = append(args, client.GetExe(ctx), kubeauth.CommandName, "--portfile", portFile)
   166  	var err error
   167  	if args, err = client.AppendKubeFlags(kubeFlags, args); err != nil {
   168  		return 0, err
   169  	}
   170  	if err := proc.StartInBackground(true, args...); err != nil {
   171  		return 0, err
   172  	}
   173  
   174  	// Wait for the new port file to emerge
   175  	ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
   176  	defer cancel()
   177  	for ctx.Err() == nil {
   178  		dtime.SleepWithContext(ctx, 10*time.Millisecond)
   179  		port, err := readPortFile(ctx, portFile, configFiles)
   180  		if err != nil {
   181  			if !os.IsNotExist(err) {
   182  				return 0, err
   183  			}
   184  			continue
   185  		}
   186  		return port, nil
   187  	}
   188  	return 0, fmt.Errorf(`timeout while waiting for "%s %s" to create a port file`, client.GetExe(ctx), kubeauth.CommandName)
   189  }
   190  
   191  func ensureAuthenticatorService(ctx context.Context, kubeFlags map[string]string, configFiles []string) (uint16, error) {
   192  	portFile := filepath.Join(filelocation.AppUserCacheDir(ctx), kubeAuthPortFile)
   193  	st, err := os.Stat(portFile)
   194  	if err != nil {
   195  		if !os.IsNotExist(err) {
   196  			return 0, err
   197  		}
   198  	} else if st.ModTime().Add(kubeauth.PortFileStaleTime).After(time.Now()) {
   199  		port, err := readPortFile(ctx, portFile, configFiles)
   200  		if err == nil {
   201  			dlog.Debug(ctx, "kubeauth service found alive and valid")
   202  			return port, nil
   203  		}
   204  		if !os.IsNotExist(err) {
   205  			return 0, err
   206  		}
   207  	}
   208  	return startAuthenticatorService(ctx, portFile, kubeFlags, configFiles)
   209  }
   210  
   211  func enableK8SAuthenticator(ctx context.Context, daemonID *daemon.Identifier) error {
   212  	cr := daemon.GetRequest(ctx)
   213  	if cr.Implicit {
   214  		return nil
   215  	}
   216  	if kkf, ok := cr.ContainerKubeFlagOverrides["kubeconfig"]; ok && strings.HasPrefix(kkf, TpCache) {
   217  		// Been there, done that
   218  		return nil
   219  	}
   220  	loader, err := client.ConfigLoader(ctx, cr.KubeFlags, cr.KubeconfigData)
   221  	if err != nil {
   222  		return err
   223  	}
   224  	config, err := patcher.CreateExternalKubeConfig(ctx, loader, cr.KubeFlags["context"],
   225  		func(configFiles []string) (string, string, error) {
   226  			port, err := ensureAuthenticatorService(ctx, cr.KubeFlags, configFiles)
   227  			if err != nil {
   228  				return "", "", err
   229  			}
   230  
   231  			// The telepresence command that will run in order to retrieve the credentials from the authenticator service
   232  			// will run in a container, so the first argument must be a path that finds the telepresence executable and
   233  			// the second must be an address that will find the host's port, not the container's localhost.
   234  			return "telepresence", fmt.Sprintf("host.docker.internal:%d", port), nil
   235  		},
   236  		func(config *api.Config) error {
   237  			return handleLocalK8s(ctx, daemonID, config)
   238  		})
   239  	if err != nil {
   240  		return err
   241  	}
   242  	patcher.AnnotateConnectRequest(&cr.ConnectRequest, TpCache, config.CurrentContext)
   243  	return err
   244  }
   245  
   246  // handleLocalK8s checks if the cluster is using a well known provider (currently minikube or kind)
   247  // and if so, ensures that the daemon container is connected to its network.
   248  func handleLocalK8s(ctx context.Context, daemonID *daemon.Identifier, config *api.Config) error {
   249  	cc := config.Contexts[config.CurrentContext]
   250  	cl := config.Clusters[cc.Cluster]
   251  	server, err := url.Parse(cl.Server)
   252  	if err != nil {
   253  		return err
   254  	}
   255  	host, portStr, err := net.SplitHostPort(server.Host)
   256  	if err != nil {
   257  		// Host doesn't have a port, so it's not a local k8s.
   258  		return nil
   259  	}
   260  	addr, err := netip.ParseAddr(host)
   261  	if err != nil {
   262  		if host == "localhost" {
   263  			addr = netip.AddrFrom4([4]byte{127, 0, 0, 1})
   264  			err = nil
   265  		}
   266  	}
   267  	if err != nil {
   268  		return nil
   269  	}
   270  	isMinikube := false
   271  	if ex, ok := cl.Extensions["cluster_info"].(*runtime2.Unknown); ok {
   272  		var data map[string]any
   273  		isMinikube = json.Unmarshal(ex.Raw, &data) == nil && data["provider"] == "minikube.sigs.k8s.io"
   274  	}
   275  	if !(addr.IsLoopback() || isMinikube) {
   276  		return nil
   277  	}
   278  
   279  	port, err := strconv.ParseUint(portStr, 10, 16)
   280  	if err != nil {
   281  		return err
   282  	}
   283  	addrPort := netip.AddrPortFrom(addr, uint16(port))
   284  
   285  	// Let's check if we have a container with port bindings for the
   286  	// given addrPort that is a known k8sapi provider
   287  	cli, err := GetClient(ctx)
   288  	if err != nil {
   289  		return err
   290  	}
   291  	cjs := runningContainers(ctx, cli)
   292  
   293  	var hostPort netip.AddrPort
   294  	var network string
   295  	if isMinikube {
   296  		hostPort, network = detectMinikube(ctx, cjs, addrPort, cc.Cluster)
   297  	} else {
   298  		hostPort, network = detectKind(ctx, cjs, addrPort)
   299  	}
   300  	if hostPort.IsValid() {
   301  		dlog.Debugf(ctx, "hostPort %s, network %s", hostPort, network)
   302  		server.Host = hostPort.String()
   303  		cl.Server = server.String()
   304  	}
   305  	if network != "" {
   306  		dcName := daemonID.ContainerName()
   307  		dlog.Debugf(ctx, "Connecting network %s to container %s", network, dcName)
   308  		if err = cli.NetworkConnect(ctx, network, dcName, nil); err != nil {
   309  			if !strings.Contains(err.Error(), "already exists") {
   310  				dlog.Debugf(ctx, "failed to connect network %s to container %s: %v", network, dcName, err)
   311  			}
   312  		}
   313  	}
   314  	return nil
   315  }
   316  
   317  // LaunchDaemon ensures that the image returned by ClientImage exists by calling PullImage. It then uses the
   318  // options DaemonOptions and DaemonArgs to start the image, and finally connectDaemon to connect to it. A
   319  // successful start yields a cache.Info entry in the cache.
   320  func LaunchDaemon(ctx context.Context, daemonID *daemon.Identifier) (conn *grpc.ClientConn, err error) {
   321  	if proc.RunningInContainer() {
   322  		return nil, errors.New("unable to start a docker container from within a container")
   323  	}
   324  	image := ClientImage(ctx)
   325  	if err = PullImage(ctx, image); err != nil {
   326  		return nil, err
   327  	}
   328  
   329  	if err = EnsureNetwork(ctx, "telepresence"); err != nil {
   330  		return nil, err
   331  	}
   332  	opts, addr, err := DaemonOptions(ctx, daemonID)
   333  	if err != nil {
   334  		return nil, errcat.NoDaemonLogs.New(err)
   335  	}
   336  	args := DaemonArgs(daemonID, addr.Port)
   337  
   338  	allArgs := make([]string, 0, len(opts)+len(args)+4)
   339  	allArgs = append(allArgs,
   340  		"run",
   341  		"--rm",
   342  		"-d",
   343  	)
   344  	allArgs = append(allArgs, opts...)
   345  	allArgs = append(allArgs, image)
   346  	allArgs = append(allArgs, args...)
   347  	stopAttempted := false
   348  	for i := 1; ; i++ {
   349  		_, err = tryLaunch(ctx, daemonID, addr.Port, allArgs)
   350  		if err != nil {
   351  			if !strings.Contains(err.Error(), "already in use by container") {
   352  				return nil, errcat.NoDaemonLogs.New(err)
   353  			}
   354  			// This may happen if the daemon has died (and hence, we never discovered it), but
   355  			// the container still hasn't died. Let's sleep for a short while and retry.
   356  			if i < 6 {
   357  				dtime.SleepWithContext(ctx, time.Duration(i)*200*time.Millisecond)
   358  				continue
   359  			}
   360  			if stopAttempted {
   361  				return nil, err
   362  			}
   363  			// Container is still alive. Try and stop it.
   364  			stopContainer(ctx, daemonID)
   365  			stopAttempted = true
   366  			i = 1
   367  			continue
   368  		}
   369  		break
   370  	}
   371  	if err = enableK8SAuthenticator(ctx, daemonID); err != nil {
   372  		return nil, err
   373  	}
   374  	if conn, err = ConnectDaemon(ctx, addr.String()); err != nil {
   375  		return nil, err
   376  	}
   377  	return conn, nil
   378  }
   379  
   380  // containerPort returns the port that the container uses internally to expose the given
   381  // addrPort on the host. Zero is returned when the addrPort is not found among
   382  // the container's port bindings.
   383  // The additional bool is true if the host address is IPv6.
   384  func containerPort(addrPort netip.AddrPort, ns *types.NetworkSettings) (port uint16, isIPv6 bool) {
   385  	for portDef, bindings := range ns.Ports {
   386  		if portDef.Proto() != "tcp" {
   387  			continue
   388  		}
   389  		for _, binding := range bindings {
   390  			addr, err := netip.ParseAddr(binding.HostIP)
   391  			if err != nil {
   392  				continue
   393  			}
   394  			pn, err := strconv.ParseUint(binding.HostPort, 10, 16)
   395  			if err != nil {
   396  				continue
   397  			}
   398  			if netip.AddrPortFrom(addr, uint16(pn)) == addrPort {
   399  				return uint16(portDef.Int()), addr.Is6()
   400  			}
   401  		}
   402  	}
   403  	return 0, false
   404  }
   405  
   406  // runningContainers returns the inspect data for all containers with status=running.
   407  func runningContainers(ctx context.Context, cli dockerClient.APIClient) []types.ContainerJSON {
   408  	cl, err := cli.ContainerList(ctx, container.ListOptions{
   409  		Filters: filters.NewArgs(filters.KeyValuePair{Key: "status", Value: "running"}),
   410  	})
   411  	if err != nil {
   412  		dlog.Errorf(ctx, "failed to list containers: %v", err)
   413  		return nil
   414  	}
   415  	cjs := make([]types.ContainerJSON, 0, len(cl))
   416  	for _, cn := range cl {
   417  		cj, err := cli.ContainerInspect(ctx, cn.ID)
   418  		if err != nil {
   419  			dlog.Errorf(ctx, "container inspect on %v failed: %v", cn.Names, err)
   420  		} else {
   421  			cjs = append(cjs, cj)
   422  		}
   423  	}
   424  	return cjs
   425  }
   426  
   427  func localAddr(ctx context.Context, cnID, nwID string, isIPv6 bool) (addr netip.Addr, err error) {
   428  	cli, err := GetClient(ctx)
   429  	if err != nil {
   430  		return addr, err
   431  	}
   432  	nw, err := cli.NetworkInspect(ctx, nwID, types.NetworkInspectOptions{})
   433  	if err != nil {
   434  		return addr, err
   435  	}
   436  	if cn, ok := nw.Containers[cnID]; ok {
   437  		// These aren't IP-addresses at all. They are prefixes!
   438  		var prefix string
   439  		if isIPv6 {
   440  			prefix = cn.IPv6Address
   441  		} else {
   442  			prefix = cn.IPv4Address
   443  		}
   444  		ap, err := netip.ParsePrefix(prefix)
   445  		if err == nil {
   446  			addr = ap.Addr()
   447  		}
   448  	}
   449  	return addr, err
   450  }
   451  
   452  // detectMinikube returns the container IP:port for the given hostAddrPort for a container where the
   453  // "name.minikube.sigs.k8s.io" label is equal to the given cluster name.
   454  // Returns the internal IP:port for the given hostAddrPort and the name of a network that makes the
   455  // IP available.
   456  func detectMinikube(ctx context.Context, cns []types.ContainerJSON, hostAddrPort netip.AddrPort, clusterName string) (netip.AddrPort, string) {
   457  	for _, cn := range cns {
   458  		if cfg, ns := cn.Config, cn.NetworkSettings; cfg != nil && ns != nil && cfg.Labels["name.minikube.sigs.k8s.io"] == clusterName {
   459  			if port, isIPv6 := containerPort(hostAddrPort, ns); port != 0 {
   460  				for networkName, network := range ns.Networks {
   461  					addr, err := localAddr(ctx, cn.ID, network.NetworkID, isIPv6)
   462  					if err != nil {
   463  						dlog.Error(ctx, err)
   464  						break
   465  					}
   466  					return netip.AddrPortFrom(addr, port), networkName
   467  				}
   468  			}
   469  		}
   470  	}
   471  	return netip.AddrPort{}, ""
   472  }
   473  
   474  // detectKind returns the container hostname:port for the given hostAddrPort for a container where the
   475  // "io.x-k8s.kind.role" label is equal to "control-plane".
   476  // Returns the internal hostname:port for the given hostAddrPort and the name of a network that makes the
   477  // hostname available.
   478  func detectKind(ctx context.Context, cns []types.ContainerJSON, hostAddrPort netip.AddrPort) (netip.AddrPort, string) {
   479  	for _, cn := range cns {
   480  		if cfg, ns := cn.Config, cn.NetworkSettings; cfg != nil && ns != nil && cfg.Labels["io.x-k8s.kind.role"] == "control-plane" {
   481  			if port, isIPv6 := containerPort(hostAddrPort, ns); port != 0 {
   482  				for n, nw := range ns.Networks {
   483  					found := false
   484  					for _, names := range nw.DNSNames {
   485  						if strings.HasSuffix(names, "-control-plane") {
   486  							found = true
   487  							break
   488  						}
   489  					}
   490  					if !found {
   491  						// Aliases got deprecated in favor of DNSNames in Docker versions 25+
   492  						for _, alias := range nw.Aliases {
   493  							if strings.HasSuffix(alias, "-control-plane") {
   494  								found = true
   495  								break
   496  							}
   497  						}
   498  					}
   499  					if found {
   500  						addr, err := localAddr(ctx, cn.ID, nw.NetworkID, isIPv6)
   501  						if err != nil {
   502  							dlog.Error(ctx, err)
   503  							break
   504  						}
   505  						return netip.AddrPortFrom(addr, port), n
   506  					}
   507  				}
   508  			}
   509  		}
   510  	}
   511  	return netip.AddrPort{}, ""
   512  }
   513  
   514  func stopContainer(ctx context.Context, daemonID *daemon.Identifier) {
   515  	args := []string{"stop", daemonID.ContainerName()}
   516  	dlog.Debug(ctx, shellquote.ShellString("docker", args))
   517  	if _, err := proc.CaptureErr(dexec.CommandContext(ctx, "docker", args...)); err != nil {
   518  		dlog.Warn(ctx, err)
   519  	}
   520  }
   521  
   522  func tryLaunch(ctx context.Context, daemonID *daemon.Identifier, port int, args []string) (string, error) {
   523  	stdErr := bytes.Buffer{}
   524  	stdOut := bytes.Buffer{}
   525  	dlog.Debug(ctx, shellquote.ShellString("docker", args))
   526  	cmd := proc.CommandContext(ctx, "docker", args...)
   527  	cmd.DisableLogging = true
   528  	cmd.Stderr = &stdErr
   529  	cmd.Stdout = &stdOut
   530  	if err := cmd.Run(); err != nil {
   531  		errStr := strings.TrimSpace(stdErr.String())
   532  		if errStr == "" {
   533  			errStr = err.Error()
   534  		}
   535  		return "", fmt.Errorf("launch of daemon container failed: %s", errStr)
   536  	}
   537  	cid := strings.TrimSpace(stdOut.String())
   538  	cr := daemon.GetRequest(ctx)
   539  	return cid, daemon.SaveInfo(ctx,
   540  		&daemon.Info{
   541  			Options:      map[string]string{"cid": cid},
   542  			InDocker:     true,
   543  			DaemonPort:   port,
   544  			Name:         daemonID.Name,
   545  			KubeContext:  daemonID.KubeContext,
   546  			Namespace:    daemonID.Namespace,
   547  			ExposedPorts: cr.ExposedPorts,
   548  			Hostname:     cr.Hostname,
   549  		}, daemonID.InfoFileName())
   550  }