github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/integration_test/itest/cluster.go (about)

     1  package itest
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io"
     9  	"net"
    10  	"net/http"
    11  	"os"
    12  	"path/filepath"
    13  	"reflect"
    14  	"runtime"
    15  	"strconv"
    16  	"strings"
    17  	"sync"
    18  	"testing"
    19  	"time"
    20  	"unicode/utf8"
    21  
    22  	"github.com/sirupsen/logrus"
    23  	"github.com/stretchr/testify/assert"
    24  	"github.com/stretchr/testify/require"
    25  	"gopkg.in/yaml.v3"
    26  	core "k8s.io/api/core/v1"
    27  	rbac "k8s.io/api/rbac/v1"
    28  	"k8s.io/apimachinery/pkg/api/resource"
    29  	k8sruntime "k8s.io/apimachinery/pkg/runtime"
    30  	"k8s.io/client-go/tools/clientcmd"
    31  	"k8s.io/client-go/tools/clientcmd/api"
    32  	sigsYaml "sigs.k8s.io/yaml"
    33  
    34  	"github.com/datawire/dlib/dexec"
    35  	"github.com/datawire/dlib/dhttp"
    36  	"github.com/datawire/dlib/dlog"
    37  	"github.com/datawire/dlib/dtime"
    38  	"github.com/datawire/dtest"
    39  	telcharts "github.com/telepresenceio/telepresence/v2/charts"
    40  	"github.com/telepresenceio/telepresence/v2/pkg/client"
    41  	"github.com/telepresenceio/telepresence/v2/pkg/client/socket"
    42  	"github.com/telepresenceio/telepresence/v2/pkg/client/userd/k8s"
    43  	"github.com/telepresenceio/telepresence/v2/pkg/dos"
    44  	"github.com/telepresenceio/telepresence/v2/pkg/filelocation"
    45  	"github.com/telepresenceio/telepresence/v2/pkg/iputil"
    46  	"github.com/telepresenceio/telepresence/v2/pkg/log"
    47  	"github.com/telepresenceio/telepresence/v2/pkg/maps"
    48  	"github.com/telepresenceio/telepresence/v2/pkg/proc"
    49  	"github.com/telepresenceio/telepresence/v2/pkg/shellquote"
    50  	"github.com/telepresenceio/telepresence/v2/pkg/slice"
    51  	"github.com/telepresenceio/telepresence/v2/pkg/version"
    52  )
    53  
    54  const (
    55  	TestUser = "telepresence-test-developer"
    56  )
    57  
    58  type Cluster interface {
    59  	CapturePodLogs(ctx context.Context, app, container, ns string) string
    60  	CompatVersion() string
    61  	Executable() (string, error)
    62  	GeneralError() error
    63  	GlobalEnv(context.Context) dos.MapEnv
    64  	AgentVersion(context.Context) string
    65  	Initialize(context.Context) context.Context
    66  	InstallTrafficManager(ctx context.Context, values map[string]string) error
    67  	InstallTrafficManagerVersion(ctx context.Context, version string, values map[string]string) error
    68  	IsCI() bool
    69  	IsIPv6() bool
    70  	Registry() string
    71  	SetGeneralError(error)
    72  	Suffix() string
    73  	TelepresenceVersion() string
    74  	UninstallTrafficManager(ctx context.Context, managerNamespace string, args ...string)
    75  	PackageHelmChart(ctx context.Context) (string, error)
    76  	GetValuesForHelm(ctx context.Context, values map[string]string, release bool) []string
    77  	GetSetArgsForHelm(ctx context.Context, values map[string]string, release bool) []string
    78  	GetK8SCluster(ctx context.Context, context, managerNamespace string) (context.Context, *k8s.Cluster, error)
    79  	TelepresenceHelmInstallOK(ctx context.Context, upgrade bool, args ...string) string
    80  	TelepresenceHelmInstall(ctx context.Context, upgrade bool, args ...string) (string, error)
    81  	UserdPProf() uint16
    82  	RootdPProf() uint16
    83  }
    84  
    85  // The cluster is created once and then reused by all tests. It ensures that:
    86  //
    87  //   - executable and the images are built once
    88  //   - a docker repository is available
    89  //   - built images are pushed to the docker repository
    90  //   - a cluster is available
    91  type cluster struct {
    92  	suffix           string
    93  	isCI             bool
    94  	prePushed        bool
    95  	ipv6             bool
    96  	executable       string
    97  	testVersion      string
    98  	compatVersion    string
    99  	registry         string
   100  	kubeConfig       string
   101  	generalError     error
   102  	logCapturingPods sync.Map
   103  	userdPProf       uint16
   104  	rootdPProf       uint16
   105  	self             Cluster
   106  }
   107  
   108  //nolint:gochecknoglobals // extension point
   109  var ExtendClusterFunc = func(c Cluster) Cluster {
   110  	return c
   111  }
   112  
   113  func WithCluster(ctx context.Context, f func(ctx context.Context)) {
   114  	s := cluster{}
   115  	s.self = &s
   116  	ec := ExtendClusterFunc(&s)
   117  	ctx = withGlobalHarness(ctx, ec)
   118  	ctx = ec.Initialize(ctx)
   119  	defer s.tearDown(ctx)
   120  	t := getT(ctx)
   121  	if !t.Failed() {
   122  		f(s.withBasicConfig(ctx, t))
   123  	}
   124  }
   125  
   126  func (s *cluster) SetSelf(self Cluster) {
   127  	s.self = self
   128  }
   129  
   130  func (s *cluster) imagesFromEnv(ctx context.Context) context.Context {
   131  	v := s.self.TelepresenceVersion()[1:]
   132  	r := s.self.Registry()
   133  	if img := ImageFromEnv(ctx, "DEV_MANAGER_IMAGE", v, r); img != nil {
   134  		ctx = WithImage(ctx, img)
   135  	}
   136  	if img := ImageFromEnv(ctx, "DEV_CLIENT_IMAGE", v, r); img != nil {
   137  		ctx = WithClientImage(ctx, img)
   138  	}
   139  	if img := ImageFromEnv(ctx, "DEV_AGENT_IMAGE", s.self.AgentVersion(ctx), r); img != nil {
   140  		ctx = WithAgentImage(ctx, img)
   141  	}
   142  	return ctx
   143  }
   144  
   145  func (s *cluster) AgentVersion(ctx context.Context) string {
   146  	return s.self.TelepresenceVersion()[1:]
   147  }
   148  
   149  func (s *cluster) Initialize(ctx context.Context) context.Context {
   150  	s.suffix, s.isCI = dos.LookupEnv(ctx, "GITHUB_SHA")
   151  	if s.isCI {
   152  		// Use 7 characters of SHA to avoid busting k8s 60 character name limit
   153  		if len(s.suffix) > 7 {
   154  			s.suffix = s.suffix[:7]
   155  		}
   156  	} else {
   157  		s.suffix = strconv.Itoa(os.Getpid())
   158  	}
   159  	s.testVersion, s.prePushed = dos.LookupEnv(ctx, "DEV_TELEPRESENCE_VERSION")
   160  	if s.prePushed {
   161  		dlog.Infof(ctx, "Using pre-pushed binary %s", s.testVersion)
   162  	} else {
   163  		s.testVersion = "v2.14.0-gotest.z" + s.suffix
   164  		dlog.Infof(ctx, "Building temp binary %s", s.testVersion)
   165  	}
   166  	version.Version, version.Structured = version.Init(s.testVersion, "TELEPRESENCE_VERSION")
   167  	s.compatVersion = dos.Getenv(ctx, "DEV_COMPAT_VERSION")
   168  
   169  	t := getT(ctx)
   170  	s.registry = dos.Getenv(ctx, "DTEST_REGISTRY")
   171  	require.NoError(t, s.generalError)
   172  	ctx = s.imagesFromEnv(ctx)
   173  
   174  	if pp := dos.Getenv(ctx, "DEV_USERD_PROFILING_PORT"); pp != "" {
   175  		port, err := strconv.ParseUint(pp, 10, 16)
   176  		require.NoError(t, err)
   177  		s.userdPProf = uint16(port)
   178  	}
   179  	if pp := dos.Getenv(ctx, "DEV_ROOTD_PROFILING_PORT"); pp != "" {
   180  		port, err := strconv.ParseUint(pp, 10, 16)
   181  		require.NoError(t, err)
   182  		s.rootdPProf = uint16(port)
   183  	}
   184  	if s.prePushed {
   185  		exe := "telepresence"
   186  		if runtime.GOOS == "windows" {
   187  			exe = "telepresence.exe"
   188  		}
   189  		s.executable = filepath.Join(GetModuleRoot(ctx), "build-output", "bin", exe)
   190  	}
   191  	errs := make(chan error, 10)
   192  	wg := &sync.WaitGroup{}
   193  	wg.Add(3)
   194  	go s.ensureExecutable(ctx, errs, wg)
   195  	go s.ensureDockerImages(ctx, errs, wg)
   196  	go s.ensureCluster(ctx, wg)
   197  	wg.Wait()
   198  	close(errs)
   199  	for err := range errs {
   200  		assert.NoError(t, err)
   201  	}
   202  
   203  	if ipv6, err := strconv.ParseBool("DEV_IPV6_CLUSTER"); err == nil {
   204  		s.ipv6 = ipv6
   205  	} else {
   206  		output, err := Output(ctx, "kubectl", "--namespace", "kube-system", "get", "svc", "kube-dns", "-o", "jsonpath={.spec.clusterIP}")
   207  		if err == nil {
   208  			ip := iputil.Parse(strings.TrimSpace(output))
   209  			if len(ip) == 16 {
   210  				dlog.Info(ctx, "Using IPv6 because the kube-dns.kube-system has an IPv6 IP")
   211  				s.ipv6 = true
   212  			}
   213  		}
   214  	}
   215  
   216  	s.ensureQuit(ctx)
   217  	_ = Run(ctx, "kubectl", "delete", "ns", "-l", "purpose=tp-cli-testing")
   218  	return ctx
   219  }
   220  
   221  func (s *cluster) tearDown(ctx context.Context) {
   222  	s.ensureQuit(ctx)
   223  	if s.kubeConfig != "" {
   224  		ctx = WithWorkingDir(ctx, GetOSSRoot(ctx))
   225  		_ = Run(ctx, "kubectl", "delete", "-f", filepath.Join("testdata", "k8s", "client_rbac.yaml"))
   226  		_ = Run(ctx, "kubectl", "delete", "--wait=false", "ns", "-l", "purpose=tp-cli-testing")
   227  	}
   228  }
   229  
   230  func (s *cluster) ensureQuit(ctx context.Context) {
   231  	// Ensure that no telepresence is running when the tests start
   232  	_, _, _ = Telepresence(ctx, "quit", "-s") //nolint:dogsled // don't care about any of the returns
   233  
   234  	// Ensure that the daemon-socket is non-existent.
   235  	_ = rmAsRoot(ctx, socket.RootDaemonPath(ctx))
   236  }
   237  
   238  func (s *cluster) ensureExecutable(ctx context.Context, errs chan<- error, wg *sync.WaitGroup) {
   239  	defer wg.Done()
   240  	if s.executable != "" {
   241  		return
   242  	}
   243  
   244  	ctx = WithModuleRoot(ctx)
   245  	exe := "telepresence"
   246  	env := map[string]string{
   247  		"TELEPRESENCE_VERSION":  s.testVersion,
   248  		"TELEPRESENCE_REGISTRY": s.registry,
   249  	}
   250  	if runtime.GOOS == "windows" {
   251  		env["CGO_ENABLED"] = "0"
   252  		exe += ".exe"
   253  	}
   254  	err := Run(WithEnv(ctx, env), "make", "build")
   255  	if err != nil {
   256  		errs <- err
   257  		return
   258  	}
   259  	s.executable = filepath.Join(GetWorkingDir(ctx), "build-output", "bin", exe)
   260  }
   261  
   262  func (s *cluster) ensureDocker(ctx context.Context, wg *sync.WaitGroup) {
   263  	defer wg.Done()
   264  	s.registry = dtest.DockerRegistry(log.WithDiscardingLogger(ctx))
   265  }
   266  
   267  func (s *cluster) ensureDockerImages(ctx context.Context, errs chan<- error, wg *sync.WaitGroup) {
   268  	defer wg.Done()
   269  	if s.prePushed || s.isCI {
   270  		return
   271  	}
   272  	makeExe := "make"
   273  	if runtime.GOOS == "windows" {
   274  		makeExe = "winmake.bat"
   275  	}
   276  
   277  	// Initialize docker and build image simultaneously
   278  	wgs := &sync.WaitGroup{}
   279  	if s.registry == "" {
   280  		wgs.Add(1)
   281  		go s.ensureDocker(ctx, wgs)
   282  	}
   283  
   284  	runMake := func(target string) {
   285  		out, err := Command(WithEnv(WithModuleRoot(ctx), map[string]string{
   286  			"TELEPRESENCE_VERSION":  s.testVersion,
   287  			"TELEPRESENCE_REGISTRY": s.registry,
   288  		}), makeExe, target).CombinedOutput()
   289  		if err != nil {
   290  			errs <- RunError(err, out)
   291  		}
   292  	}
   293  
   294  	wgs.Add(2)
   295  	go func() {
   296  		defer wgs.Done()
   297  		runMake("tel2-image")
   298  	}()
   299  	go func() {
   300  		defer wgs.Done()
   301  		runMake("client-image")
   302  	}()
   303  	wgs.Wait()
   304  
   305  	//  Image built and a registry exists. Push the image
   306  	runMake("push-images")
   307  }
   308  
   309  func (s *cluster) ensureCluster(ctx context.Context, wg *sync.WaitGroup) {
   310  	defer wg.Done()
   311  	if s.registry == "" {
   312  		dwg := sync.WaitGroup{}
   313  		dwg.Add(1)
   314  		s.ensureDocker(ctx, &dwg)
   315  		dwg.Wait()
   316  	}
   317  	t := getT(ctx)
   318  	s.kubeConfig = dos.Getenv(ctx, "DTEST_KUBECONFIG")
   319  	if s.kubeConfig == "" {
   320  		s.kubeConfig = dtest.Kubeconfig(log.WithDiscardingLogger(ctx))
   321  	}
   322  	require.NoError(t, os.Chmod(s.kubeConfig, 0o600), "failed to chmod 0600 %q", s.kubeConfig)
   323  
   324  	// Delete any lingering traffic-manager resources that aren't bound to specific namespaces.
   325  	_ = Run(ctx, "kubectl", "delete", "mutatingwebhookconfiguration,role,rolebinding", "-l", "app=traffic-manager")
   326  }
   327  
   328  // PodCreateTimeout will return a timeout suitable for operations that create pods.
   329  // This is longer when running against clusters that scale up nodes on demand for new pods.
   330  func PodCreateTimeout(c context.Context) time.Duration {
   331  	switch GetProfile(c) {
   332  	case GkeAutopilotProfile:
   333  		return 5 * time.Minute
   334  	case DefaultProfile:
   335  		fallthrough
   336  	default: // this really shouldn't be happening but hey
   337  		return 180 * time.Second
   338  	}
   339  }
   340  
   341  func (s *cluster) withBasicConfig(c context.Context, t *testing.T) context.Context {
   342  	config := client.GetDefaultConfigFunc()
   343  	config.LogLevels().UserDaemon = logrus.DebugLevel
   344  	config.LogLevels().RootDaemon = logrus.DebugLevel
   345  
   346  	to := config.Timeouts()
   347  	to.PrivateClusterConnect = 60 * time.Second
   348  	to.PrivateEndpointDial = 10 * time.Second
   349  	to.PrivateHelm = PodCreateTimeout(c)
   350  	to.PrivateIntercept = 30 * time.Second
   351  	to.PrivateProxyDial = 30 * time.Second
   352  	to.PrivateRoundtripLatency = 5 * time.Second
   353  	to.PrivateTrafficManagerAPI = 120 * time.Second
   354  	to.PrivateTrafficManagerConnect = 180 * time.Second
   355  
   356  	images := config.Images()
   357  	images.PrivateRegistry = s.self.Registry()
   358  	if agentImage := GetAgentImage(c); agentImage != nil {
   359  		images.PrivateAgentImage = agentImage.FQName()
   360  		images.PrivateWebhookRegistry = agentImage.Registry
   361  	}
   362  	if clientImage := GetClientImage(c); clientImage != nil {
   363  		images.PrivateClientImage = clientImage.FQName()
   364  	}
   365  
   366  	config.Grpc().MaxReceiveSizeV, _ = resource.ParseQuantity("10Mi")
   367  	config.Intercept().UseFtp = true
   368  
   369  	configYaml, err := yaml.Marshal(&config)
   370  	require.NoError(t, err)
   371  	configYamlStr := string(configYaml)
   372  
   373  	configDir := t.TempDir()
   374  	c = filelocation.WithAppUserConfigDir(c, configDir)
   375  	c, err = SetConfig(c, configDir, configYamlStr)
   376  	require.NoError(t, err)
   377  	return c
   378  }
   379  
   380  func (s *cluster) GlobalEnv(ctx context.Context) dos.MapEnv {
   381  	globalEnv := dos.MapEnv{
   382  		"KUBECONFIG": s.kubeConfig,
   383  	}
   384  	yes := struct{}{}
   385  	includeEnv := map[string]struct{}{
   386  		"SCOUT_DISABLE":             yes,
   387  		"HOME":                      yes,
   388  		"PATH":                      yes,
   389  		"LOGNAME":                   yes,
   390  		"USER":                      yes,
   391  		"TMPDIR":                    yes,
   392  		"MAKELEVEL":                 yes,
   393  		"TELEPRESENCE_MAX_LOGFILES": yes,
   394  	}
   395  	if runtime.GOOS == "windows" {
   396  		includeEnv["APPDATA"] = yes
   397  		includeEnv["AppData"] = yes
   398  		includeEnv["LOCALAPPDATA"] = yes
   399  		includeEnv["LocalAppData"] = yes
   400  		includeEnv["OS"] = yes
   401  		includeEnv["TEMP"] = yes
   402  		includeEnv["TMP"] = yes
   403  		includeEnv["Path"] = yes
   404  		includeEnv["PATHEXT"] = yes
   405  		includeEnv["ProgramFiles"] = yes
   406  		includeEnv["ProgramData"] = yes
   407  		includeEnv["SystemDrive"] = yes
   408  		includeEnv["USERPROFILE"] = yes
   409  		includeEnv["USERNAME"] = yes
   410  		includeEnv["windir"] = yes
   411  	}
   412  	for _, env := range dos.Environ(ctx) {
   413  		if eqIdx := strings.IndexByte(env, '='); eqIdx > 0 {
   414  			key := env[:eqIdx]
   415  			if _, ok := includeEnv[key]; ok {
   416  				globalEnv[key] = env[eqIdx+1:]
   417  			}
   418  		}
   419  	}
   420  	return globalEnv
   421  }
   422  
   423  func (s *cluster) Executable() (string, error) {
   424  	return s.executable, nil
   425  }
   426  
   427  func (s *cluster) GeneralError() error {
   428  	return s.generalError
   429  }
   430  
   431  func (s *cluster) IsCI() bool {
   432  	return s.isCI
   433  }
   434  
   435  func (s *cluster) IsIPv6() bool {
   436  	return s.ipv6
   437  }
   438  
   439  func (s *cluster) Registry() string {
   440  	return s.registry
   441  }
   442  
   443  func (s *cluster) SetGeneralError(err error) {
   444  	s.generalError = err
   445  }
   446  
   447  func (s *cluster) Suffix() string {
   448  	return s.suffix
   449  }
   450  
   451  func (s *cluster) TelepresenceVersion() string {
   452  	return s.testVersion
   453  }
   454  
   455  func (s *cluster) CompatVersion() string {
   456  	return s.compatVersion
   457  }
   458  
   459  func (s *cluster) UserdPProf() uint16 {
   460  	return s.userdPProf
   461  }
   462  
   463  func (s *cluster) RootdPProf() uint16 {
   464  	return s.rootdPProf
   465  }
   466  
   467  func (s *cluster) CapturePodLogs(ctx context.Context, app, container, ns string) string {
   468  	var pods []string
   469  	for i := 0; ; i++ {
   470  		runningPods := RunningPods(ctx, app, ns)
   471  		if len(runningPods) > 0 {
   472  			if container == "" {
   473  				pods = runningPods
   474  			} else {
   475  				for _, pod := range runningPods {
   476  					cns, err := KubectlOut(ctx, ns, "get", "pods", pod, "-o", "jsonpath={.spec.containers[*].name}")
   477  					if err == nil && slice.Contains(strings.Split(cns, " "), container) {
   478  						pods = append(pods, pod)
   479  					}
   480  				}
   481  			}
   482  		}
   483  		if len(pods) > 0 || i == 5 {
   484  			break
   485  		}
   486  		dtime.SleepWithContext(ctx, 2*time.Second)
   487  	}
   488  
   489  	if len(pods) == 0 {
   490  		if container == "" {
   491  			dlog.Errorf(ctx, "found no %s pods in namespace %s", app, ns)
   492  		} else {
   493  			dlog.Errorf(ctx, "found no %s pods in namespace %s with a %s container", app, ns, container)
   494  		}
   495  		return ""
   496  	}
   497  	present := struct{}{}
   498  
   499  	// Use another logger to avoid errors due to logs arriving after the tests complete.
   500  	ctx = dlog.WithLogger(ctx, dlog.WrapLogrus(logrus.StandardLogger()))
   501  	pod := pods[0]
   502  	key := pod
   503  	if container != "" {
   504  		key += "/" + container
   505  	}
   506  	if _, ok := s.logCapturingPods.LoadOrStore(key, present); ok {
   507  		return ""
   508  	}
   509  
   510  	logFile, err := os.Create(
   511  		filepath.Join(filelocation.AppUserLogDir(ctx), fmt.Sprintf("%s-%s.log", dtime.Now().Format("20060102T150405"), pod)))
   512  	if err != nil {
   513  		s.logCapturingPods.Delete(pod)
   514  		dlog.Errorf(ctx, "unable to create pod logfile %s: %v", logFile.Name(), err)
   515  		return ""
   516  	}
   517  
   518  	args := []string{"--namespace", ns, "logs", "-f", pod}
   519  	if container != "" {
   520  		args = append(args, "-c", container)
   521  	}
   522  	// Let command die when the pod that it logs die
   523  	cmd := Command(context.WithoutCancel(ctx), "kubectl", args...)
   524  	cmd.Stdout = logFile
   525  	cmd.Stderr = logFile
   526  	ready := make(chan string, 1)
   527  	go func() {
   528  		defer func() {
   529  			_ = logFile.Close()
   530  			s.logCapturingPods.Delete(pod)
   531  		}()
   532  		err := cmd.Start()
   533  		if err == nil {
   534  			if container == "" {
   535  				dlog.Infof(ctx, "Capturing logs for pod %s", pod)
   536  			} else {
   537  				dlog.Infof(ctx, "Capturing logs for pod %s, container %s", pod, container)
   538  			}
   539  			ready <- logFile.Name()
   540  			close(ready)
   541  			err = cmd.Wait()
   542  		}
   543  		if err != nil {
   544  			if container == "" {
   545  				dlog.Errorf(ctx, "log capture for pod %s failed: %v", pod, err)
   546  			} else {
   547  				dlog.Errorf(ctx, "log capture for pod %s, container %s failed: %v", pod, container, err)
   548  			}
   549  			select {
   550  			case <-ready:
   551  			default:
   552  				close(ready)
   553  			}
   554  		}
   555  	}()
   556  	select {
   557  	case <-ctx.Done():
   558  		dlog.Infof(ctx, "log capture for pod %s interrupted prior to start", pod)
   559  		return ""
   560  	case file := <-ready:
   561  		return file
   562  	}
   563  }
   564  
   565  func (s *cluster) PackageHelmChart(ctx context.Context) (string, error) {
   566  	filename := filepath.Join(getT(ctx).TempDir(), "telepresence-chart.tgz")
   567  	fh, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o666)
   568  	if err != nil {
   569  		return "", err
   570  	}
   571  	if err := telcharts.WriteChart(telcharts.DirTypeTelepresence, fh, "telepresence", s.self.TelepresenceVersion()[1:]); err != nil {
   572  		_ = fh.Close()
   573  		return "", err
   574  	}
   575  	if err := fh.Close(); err != nil {
   576  		return "", err
   577  	}
   578  	return filename, nil
   579  }
   580  
   581  func (s *cluster) GetSetArgsForHelm(ctx context.Context, values map[string]string, release bool) []string {
   582  	settings := s.GetValuesForHelm(ctx, values, release)
   583  	args := make([]string, len(settings)*2)
   584  	n := 0
   585  	for _, s := range settings {
   586  		args[n] = "--set"
   587  		n++
   588  		args[n] = s
   589  		n++
   590  	}
   591  	return args
   592  }
   593  
   594  func (s *cluster) GetValuesForHelm(ctx context.Context, values map[string]string, release bool) []string {
   595  	nss := GetNamespaces(ctx)
   596  	settings := []string{
   597  		"logLevel=debug",
   598  		"client.routing.allowConflictingSubnets={10.0.0.0/8}",
   599  	}
   600  	if len(nss.ManagedNamespaces) > 0 {
   601  		settings = append(settings,
   602  			fmt.Sprintf("clientRbac.namespaces=%s", nss.HelmString()),
   603  			fmt.Sprintf("managerRbac.namespaces=%s", nss.HelmString()),
   604  		)
   605  	}
   606  	agentImage := GetAgentImage(ctx)
   607  	if agentImage != nil {
   608  		settings = append(settings,
   609  			fmt.Sprintf("agentInjector.agentImage.name=%s", agentImage.Name), // Prevent attempts to retrieve image from SystemA
   610  			fmt.Sprintf("agentInjector.agentImage.tag=%s", agentImage.Tag),
   611  			fmt.Sprintf("agentInjector.agentImage.registry=%s", agentImage.Registry))
   612  	}
   613  	if !release {
   614  		settings = append(settings, fmt.Sprintf("image.registry=%s", s.self.Registry()))
   615  	}
   616  
   617  	for k, v := range values {
   618  		settings = append(settings, k+"="+v)
   619  	}
   620  	return settings
   621  }
   622  
   623  func (s *cluster) InstallTrafficManager(ctx context.Context, values map[string]string) error {
   624  	chartFilename, err := s.self.PackageHelmChart(ctx)
   625  	if err != nil {
   626  		return err
   627  	}
   628  	return s.installChart(ctx, false, chartFilename, values)
   629  }
   630  
   631  // InstallTrafficManagerVersion performs a helm install of a specific version of the traffic-manager using
   632  // the helm registry at https://app.getambassador.io. It is assumed that the image to use for the traffic-manager
   633  // can be pulled from the standard registry at docker.io/datawire, and that the traffic-manager image is
   634  // configured using DEV_AGENT_IMAGE.
   635  //
   636  // The intent is to simulate connection to an older cluster from the current client.
   637  func (s *cluster) InstallTrafficManagerVersion(ctx context.Context, version string, values map[string]string) error {
   638  	chartFilename, err := s.pullHelmChart(ctx, version)
   639  	if err != nil {
   640  		return err
   641  	}
   642  	return s.installChart(ctx, true, chartFilename, values)
   643  }
   644  
   645  func (s *cluster) installChart(ctx context.Context, release bool, chartFilename string, values map[string]string) error {
   646  	settings := s.self.GetSetArgsForHelm(ctx, values, release)
   647  
   648  	ctx = WithWorkingDir(ctx, GetOSSRoot(ctx))
   649  	nss := GetNamespaces(ctx)
   650  	args := []string{"install", "-n", nss.Namespace, "--wait"}
   651  	args = append(args, settings...)
   652  	args = append(args, "traffic-manager", chartFilename)
   653  
   654  	err := Run(ctx, "helm", args...)
   655  	if err == nil {
   656  		err = RolloutStatusWait(ctx, nss.Namespace, "deploy/traffic-manager")
   657  		if err == nil {
   658  			s.self.CapturePodLogs(ctx, "traffic-manager", "", nss.Namespace)
   659  		}
   660  	}
   661  	return err
   662  }
   663  
   664  func (s *cluster) TelepresenceHelmInstallOK(ctx context.Context, upgrade bool, settings ...string) string {
   665  	logFile, err := s.self.TelepresenceHelmInstall(ctx, upgrade, settings...)
   666  	require.NoError(getT(ctx), err)
   667  	return logFile
   668  }
   669  
   670  func (s *cluster) TelepresenceHelmInstall(ctx context.Context, upgrade bool, settings ...string) (string, error) {
   671  	nss := GetNamespaces(ctx)
   672  	subjectNames := []string{TestUser}
   673  	subjects := make([]rbac.Subject, len(subjectNames))
   674  	for i, s := range subjectNames {
   675  		subjects[i] = rbac.Subject{
   676  			Kind:      "ServiceAccount",
   677  			Name:      s,
   678  			Namespace: nss.Namespace,
   679  		}
   680  	}
   681  
   682  	type xRbac struct {
   683  		Create     bool           `json:"create"`
   684  		Namespaced bool           `json:"namespaced"`
   685  		Subjects   []rbac.Subject `json:"subjects,omitempty"`
   686  		Namespaces []string       `json:"namespaces,omitempty"`
   687  	}
   688  	type xAgent struct {
   689  		Image *Image `json:"image,omitempty"`
   690  	}
   691  	var agent *xAgent
   692  	if agentImage := GetAgentImage(ctx); agentImage != nil {
   693  		agent = &xAgent{Image: agentImage}
   694  	}
   695  	type xClient struct {
   696  		Routing map[string][]string `json:"routing"`
   697  	}
   698  	type xTimeouts struct {
   699  		AgentArrival string `json:"agentArrival,omitempty"`
   700  	}
   701  	nsl := nss.UniqueList()
   702  	vx := struct {
   703  		LogLevel        string    `json:"logLevel"`
   704  		MetritonEnabled bool      `json:"metritonEnabled"`
   705  		Image           *Image    `json:"image,omitempty"`
   706  		Agent           *xAgent   `json:"agent,omitempty"`
   707  		ClientRbac      xRbac     `json:"clientRbac"`
   708  		ManagerRbac     xRbac     `json:"managerRbac"`
   709  		Client          xClient   `json:"client"`
   710  		Timeouts        xTimeouts `json:"timeouts,omitempty"`
   711  	}{
   712  		LogLevel:        "debug",
   713  		MetritonEnabled: false,
   714  		Image:           GetImage(ctx),
   715  		Agent:           agent,
   716  		ClientRbac: xRbac{
   717  			Create:     true,
   718  			Namespaced: len(nss.ManagedNamespaces) > 0,
   719  			Subjects:   subjects,
   720  			Namespaces: nsl,
   721  		},
   722  		ManagerRbac: xRbac{
   723  			Create:     true,
   724  			Namespaced: len(nss.ManagedNamespaces) > 0,
   725  			Namespaces: nsl,
   726  		},
   727  		Client: xClient{
   728  			Routing: map[string][]string{
   729  				"allowConflictingSubnets": {"10.0.0.0/8"},
   730  			},
   731  		},
   732  		Timeouts: xTimeouts{AgentArrival: "60s"},
   733  	}
   734  	ss, err := sigsYaml.Marshal(&vx)
   735  	if err != nil {
   736  		return "", err
   737  	}
   738  	valuesFile := filepath.Join(getT(ctx).TempDir(), "values.yaml")
   739  	if err := os.WriteFile(valuesFile, ss, 0o644); err != nil {
   740  		return "", err
   741  	}
   742  
   743  	verb := "install"
   744  	if upgrade {
   745  		verb = "upgrade"
   746  	}
   747  	args := []string{"helm", verb, "-n", nss.Namespace, "-f", valuesFile}
   748  	args = append(args, settings...)
   749  
   750  	if _, _, err = Telepresence(WithUser(ctx, "default"), args...); err != nil {
   751  		return "", err
   752  	}
   753  	if err = RolloutStatusWait(ctx, nss.Namespace, "deploy/traffic-manager"); err != nil {
   754  		return "", err
   755  	}
   756  	logFileName := s.self.CapturePodLogs(ctx, "traffic-manager", "", nss.Namespace)
   757  	return logFileName, nil
   758  }
   759  
   760  func (s *cluster) pullHelmChart(ctx context.Context, version string) (string, error) {
   761  	if err := Run(ctx, "helm", "repo", "add", "datawire", "https://app.getambassador.io"); err != nil {
   762  		return "", err
   763  	}
   764  	if err := Run(ctx, "helm", "repo", "update"); err != nil {
   765  		return "", err
   766  	}
   767  	dir := getT(ctx).TempDir()
   768  	if err := Run(WithWorkingDir(ctx, dir), "helm", "pull", "datawire/telepresence", "--version", version); err != nil {
   769  		return "", err
   770  	}
   771  	return filepath.Join(dir, fmt.Sprintf("telepresence-%s.tgz", version)), nil
   772  }
   773  
   774  func (s *cluster) UninstallTrafficManager(ctx context.Context, managerNamespace string, args ...string) {
   775  	t := getT(ctx)
   776  	ctx = WithUser(ctx, "default")
   777  	TelepresenceOk(ctx, append([]string{"helm", "uninstall", "--manager-namespace", managerNamespace}, args...)...)
   778  
   779  	// Helm uninstall does deletions asynchronously, so let's wait until the deployment is gone
   780  	assert.Eventually(t, func() bool { return len(RunningPods(ctx, "traffic-manager", managerNamespace)) == 0 },
   781  		60*time.Second, 4*time.Second, "traffic-manager deployment was not removed")
   782  	TelepresenceQuitOk(ctx)
   783  }
   784  
   785  func (s *cluster) GetK8SCluster(ctx context.Context, context, managerNamespace string) (context.Context, *k8s.Cluster, error) {
   786  	_ = os.Setenv("KUBECONFIG", KubeConfig(ctx))
   787  	flags := map[string]string{
   788  		"namespace": managerNamespace,
   789  	}
   790  	if context != "" {
   791  		flags["context"] = context
   792  	}
   793  	cfgAndFlags, err := client.NewKubeconfig(ctx, flags, managerNamespace)
   794  	if err != nil {
   795  		return ctx, nil, err
   796  	}
   797  	kc, err := k8s.NewCluster(ctx, cfgAndFlags, nil)
   798  	if err != nil {
   799  		return ctx, nil, err
   800  	}
   801  	return kc.WithK8sInterface(ctx), kc, nil
   802  }
   803  
   804  func KubeConfig(ctx context.Context) string {
   805  	kubeConf, _ := LookupEnv(ctx, "KUBECONFIG")
   806  	return kubeConf
   807  }
   808  
   809  const sensitivePrefix = "--$sensitive$--"
   810  
   811  // WrapSensitive wraps an argument sent to Command so that it doesn't get logged verbatim. This can
   812  // be used for commands like "telepresence login --apikey NNNN" where the NNN shouldn't be visible
   813  // in the logs. If NNN Is wrapped using this function, it will appear as "***" in the logs.
   814  func WrapSensitive(s string) string {
   815  	return sensitivePrefix + s
   816  }
   817  
   818  // Command creates and returns a dexec.Cmd  initialized with the global environment
   819  // from the cluster harness and any other environment that has been added using the
   820  // WithEnv() function.
   821  func Command(ctx context.Context, executable string, args ...string) *dexec.Cmd {
   822  	getT(ctx).Helper()
   823  	// Ensure that command has a timestamp and is somewhat readable
   824  	dbgArgs := args
   825  	copied := false
   826  	for i, a := range args {
   827  		if strings.HasPrefix(a, sensitivePrefix) {
   828  			if !copied {
   829  				dbgArgs = make([]string, len(args))
   830  				copy(dbgArgs, args)
   831  				args = make([]string, len(args))
   832  				copy(args, dbgArgs)
   833  				copied = true
   834  			}
   835  			dbgArgs[i] = "***"
   836  			args[i] = strings.TrimPrefix(a, sensitivePrefix)
   837  		}
   838  	}
   839  	dlog.Debug(ctx, "executing ", shellquote.ShellString(filepath.Base(executable), dbgArgs))
   840  	cmd := proc.CommandContext(ctx, executable, args...)
   841  	cmd.DisableLogging = true
   842  	cmd.Env = EnvironMap(ctx).Environ()
   843  	cmd.Dir = GetWorkingDir(ctx)
   844  	cmd.Stdin = dos.Stdin(ctx)
   845  	return cmd
   846  }
   847  
   848  func EnvironMap(ctx context.Context) dos.MapEnv {
   849  	env := GetGlobalHarness(ctx).GlobalEnv(ctx)
   850  	maps.Merge(env, getEnv(ctx))
   851  	return env
   852  }
   853  
   854  // TelepresenceOk executes the CLI command in a new process and requires the result to be OK.
   855  func TelepresenceOk(ctx context.Context, args ...string) string {
   856  	t := getT(ctx)
   857  	t.Helper()
   858  	stdout, stderr, err := Telepresence(ctx, args...)
   859  	require.NoError(t, err, "telepresence was unable to run, stdout %s", stdout)
   860  	if err == nil {
   861  		if strings.HasPrefix(stderr, "Warning:") && !strings.ContainsRune(stderr, '\n') {
   862  			// Accept warnings, but log them.
   863  			dlog.Warn(ctx, stderr)
   864  		} else {
   865  			assert.Empty(t, stderr, "Expected stderr to be empty, but got: %s", stderr)
   866  		}
   867  	}
   868  	return stdout
   869  }
   870  
   871  // Telepresence executes the CLI command in a new process.
   872  func Telepresence(ctx context.Context, args ...string) (string, string, error) {
   873  	t := getT(ctx)
   874  	t.Helper()
   875  	cmd := TelepresenceCmd(ctx, args...)
   876  	stdout := cmd.Stdout.(*strings.Builder)
   877  	stderr := cmd.Stderr.(*strings.Builder)
   878  	err := cmd.Run()
   879  	errStr := strings.TrimSpace(stderr.String())
   880  	if err != nil {
   881  		err = RunError(err, []byte(errStr))
   882  	}
   883  	return strings.TrimSpace(stdout.String()), errStr, err
   884  }
   885  
   886  // TelepresenceCmd creates a dexec.Cmd using the Command function. Before the command is created,
   887  // the environment is extended with DEV_TELEPRESENCE_CONFIG_DIR from filelocation.AppUserConfigDir
   888  // and DEV_TELEPRESENCE_LOG_DIR from filelocation.AppUserLogDir.
   889  func TelepresenceCmd(ctx context.Context, args ...string) *dexec.Cmd {
   890  	t := getT(ctx)
   891  	t.Helper()
   892  
   893  	var stdout, stderr strings.Builder
   894  	ctx = WithEnv(ctx, map[string]string{
   895  		"DEV_TELEPRESENCE_CONFIG_DIR": filelocation.AppUserConfigDir(ctx),
   896  		"DEV_TELEPRESENCE_LOG_DIR":    filelocation.AppUserLogDir(ctx),
   897  	})
   898  
   899  	gh := GetGlobalHarness(ctx)
   900  	if len(args) > 0 && (args[0] == "connect") {
   901  		rest := args[1:]
   902  		args = append(make([]string, 0, len(args)+3), args[0])
   903  		if user := GetUser(ctx); user != "default" {
   904  			args = append(args, "--as", "system:serviceaccount:"+user)
   905  		}
   906  		if gh.UserdPProf() > 0 {
   907  			args = append(args, "--userd-profiling-port", strconv.Itoa(int(gh.UserdPProf())))
   908  		}
   909  		if gh.RootdPProf() > 0 {
   910  			args = append(args, "--rootd-profiling-port", strconv.Itoa(int(gh.RootdPProf())))
   911  		}
   912  		args = append(args, rest...)
   913  	}
   914  	exe, _ := gh.Executable()
   915  	cmd := Command(ctx, exe, args...)
   916  	cmd.Stdout = &stdout
   917  	cmd.Stderr = &stderr
   918  	return cmd
   919  }
   920  
   921  // TelepresenceDisconnectOk tells telepresence to quit and asserts that the stdout contains the correct output.
   922  func TelepresenceDisconnectOk(ctx context.Context, args ...string) {
   923  	AssertDisconnectOutput(ctx, TelepresenceOk(ctx, append([]string{"quit"}, args...)...))
   924  }
   925  
   926  // AssertDisconnectOutput asserts that the stdout contains the correct output from a telepresence quit command.
   927  func AssertDisconnectOutput(ctx context.Context, stdout string) {
   928  	t := getT(ctx)
   929  	assert.True(t, strings.Contains(stdout, "Disconnected") || strings.Contains(stdout, "Not connected"))
   930  	if t.Failed() {
   931  		t.Logf("Disconnect output was %q", stdout)
   932  	}
   933  }
   934  
   935  // TelepresenceQuitOk tells telepresence to quit and asserts that the stdout contains the correct output.
   936  func TelepresenceQuitOk(ctx context.Context) {
   937  	AssertQuitOutput(ctx, TelepresenceOk(ctx, "quit", "-s"))
   938  }
   939  
   940  // AssertQuitOutput asserts that the stdout contains the correct output from a telepresence quit command.
   941  func AssertQuitOutput(ctx context.Context, stdout string) {
   942  	t := getT(ctx)
   943  	assert.True(t, strings.Contains(stdout, "Telepresence Daemons quitting...done") ||
   944  		strings.Contains(stdout, "Telepresence Daemons have already quit"))
   945  	if t.Failed() {
   946  		t.Logf("Quit output was %q", stdout)
   947  	}
   948  }
   949  
   950  // RunError checks if the given err is a *exit.ExitError, and if so, extracts
   951  // Stderr and the ExitCode from it.
   952  func RunError(err error, out []byte) error {
   953  	if ee, ok := err.(*dexec.ExitError); ok {
   954  		switch {
   955  		case len(ee.Stderr) > 0:
   956  			err = fmt.Errorf("%s, exit code %d", string(ee.Stderr), ee.ExitCode())
   957  		case utf8.ValidString(string(out)):
   958  			err = fmt.Errorf("%s, exit code %d", string(out), ee.ExitCode())
   959  		default:
   960  			err = fmt.Errorf("exit code %d", ee.ExitCode())
   961  		}
   962  	}
   963  	return err
   964  }
   965  
   966  // Run runs the given command and arguments and returns an error if the command failed.
   967  func Run(ctx context.Context, exe string, args ...string) error {
   968  	getT(ctx).Helper()
   969  	out, err := Command(ctx, exe, args...).CombinedOutput()
   970  	if err != nil {
   971  		return RunError(err, out)
   972  	}
   973  	return nil
   974  }
   975  
   976  // Output runs the given command and arguments and returns its combined output and an error if the command failed.
   977  func Output(ctx context.Context, exe string, args ...string) (string, error) {
   978  	getT(ctx).Helper()
   979  	cmd := Command(ctx, exe, args...)
   980  	stderr := bytes.Buffer{}
   981  	cmd.Stderr = &stderr
   982  	out, err := cmd.Output()
   983  	if err != nil {
   984  		return string(out), RunError(err, stderr.Bytes())
   985  	}
   986  	return string(out), nil
   987  }
   988  
   989  // Kubectl runs kubectl with the default context and the given namespace, or in the default namespace if the given
   990  // namespace is an empty string.
   991  func Kubectl(ctx context.Context, namespace string, args ...string) error {
   992  	getT(ctx).Helper()
   993  	var ks []string
   994  	if namespace != "" {
   995  		ks = append(make([]string, 0, len(args)+2), "--namespace", namespace)
   996  		ks = append(ks, args...)
   997  	} else {
   998  		ks = args
   999  	}
  1000  	return Run(ctx, "kubectl", ks...)
  1001  }
  1002  
  1003  // KubectlOut runs kubectl with the default context and the application namespace and returns its combined output.
  1004  func KubectlOut(ctx context.Context, namespace string, args ...string) (string, error) {
  1005  	getT(ctx).Helper()
  1006  	var ks []string
  1007  	if namespace != "" {
  1008  		ks = append(make([]string, 0, len(args)+2), "--namespace", namespace)
  1009  		ks = append(ks, args...)
  1010  	} else {
  1011  		ks = args
  1012  	}
  1013  	return Output(ctx, "kubectl", ks...)
  1014  }
  1015  
  1016  func CreateNamespaces(ctx context.Context, namespaces ...string) {
  1017  	t := getT(ctx)
  1018  	t.Helper()
  1019  	wg := sync.WaitGroup{}
  1020  	wg.Add(len(namespaces))
  1021  	for _, ns := range namespaces {
  1022  		go func(ns string) {
  1023  			defer wg.Done()
  1024  			assert.NoError(t, Kubectl(ctx, "", "create", "namespace", ns), "failed to create namespace %q", ns)
  1025  			assert.NoError(t, Kubectl(ctx, "", "label", "namespace", ns, "purpose="+purposeLabel, fmt.Sprintf("app.kubernetes.io/name=%s", ns)))
  1026  		}(ns)
  1027  	}
  1028  	wg.Wait()
  1029  }
  1030  
  1031  func DeleteNamespaces(ctx context.Context, namespaces ...string) {
  1032  	t := getT(ctx)
  1033  	t.Helper()
  1034  	wg := sync.WaitGroup{}
  1035  	wg.Add(len(namespaces))
  1036  	for _, ns := range namespaces {
  1037  		if t.Failed() {
  1038  			if out, err := KubectlOut(ctx, ns, "get", "events", "--field-selector", "type!=Normal"); err == nil {
  1039  				dlog.Debugf(ctx, "Events where type != Normal from namespace %s\n%s", ns, out)
  1040  			}
  1041  		}
  1042  		go func(ns string) {
  1043  			defer wg.Done()
  1044  			assert.NoError(t, Kubectl(ctx, "", "delete", "namespace", "--wait=false", ns))
  1045  		}(ns)
  1046  	}
  1047  	wg.Wait()
  1048  }
  1049  
  1050  // StartLocalHttpEchoServerWithHost is like StartLocalHttpEchoServer but binds to a specific host instead of localhost.
  1051  func StartLocalHttpEchoServerWithHost(ctx context.Context, name string, host string) (int, context.CancelFunc) {
  1052  	ctx, cancel := context.WithCancel(ctx)
  1053  	lc := net.ListenConfig{}
  1054  	l, err := lc.Listen(ctx, "tcp", net.JoinHostPort(host, "0"))
  1055  	require.NoError(getT(ctx), err, "failed to listen on localhost")
  1056  	go func() {
  1057  		sc := &dhttp.ServerConfig{
  1058  			Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  1059  				fmt.Fprintf(w, "%s from intercept at %s", name, r.URL.Path)
  1060  			}),
  1061  		}
  1062  		err := sc.Serve(ctx, l)
  1063  		if err != nil {
  1064  			dlog.Errorf(ctx, "http server on %s exited with error: %v", host, err)
  1065  		} else {
  1066  			dlog.Errorf(ctx, "http server on %s exited", host)
  1067  		}
  1068  	}()
  1069  	return l.Addr().(*net.TCPAddr).Port, cancel
  1070  }
  1071  
  1072  // StartLocalHttpEchoServer starts a local http server that echoes a line with the given name and
  1073  // the current URL path. The port is returned together with function that cancels the server.
  1074  func StartLocalHttpEchoServer(ctx context.Context, name string) (int, context.CancelFunc) {
  1075  	return StartLocalHttpEchoServerWithHost(ctx, name, "localhost")
  1076  }
  1077  
  1078  // PingInterceptedEchoServer assumes that a server has been created using StartLocalHttpEchoServer and
  1079  // that an intercept is active for the given svc and svcPort that will redirect to that local server.
  1080  func PingInterceptedEchoServer(ctx context.Context, svc, svcPort string, headers ...string) {
  1081  	wl := svc
  1082  	if slashIdx := strings.IndexByte(svc, '/'); slashIdx > 0 {
  1083  		wl = svc[slashIdx+1:]
  1084  		svc = svc[:slashIdx]
  1085  	}
  1086  	expectedOutput := fmt.Sprintf("%s from intercept at /", wl)
  1087  	require.Eventually(getT(ctx), func() bool {
  1088  		// condition
  1089  		ips, err := net.DefaultResolver.LookupIP(ctx, "ip", svc)
  1090  		if err != nil {
  1091  			dlog.Info(ctx, err)
  1092  			return false
  1093  		}
  1094  		if len(ips) != 1 {
  1095  			dlog.Infof(ctx, "Lookup for %s returned %v", svc, ips)
  1096  			return false
  1097  		}
  1098  
  1099  		hc := http.Client{Timeout: 2 * time.Second}
  1100  		rq, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://%s", net.JoinHostPort(ips[0].String(), svcPort)), nil)
  1101  		if err != nil {
  1102  			dlog.Info(ctx, err)
  1103  			return false
  1104  		}
  1105  		for _, h := range headers {
  1106  			kv := strings.SplitN(h, "=", 2)
  1107  			rq.Header[kv[0]] = []string{kv[1]}
  1108  		}
  1109  		resp, err := hc.Do(rq)
  1110  		if err != nil {
  1111  			dlog.Info(ctx, err)
  1112  			return false
  1113  		}
  1114  		defer resp.Body.Close()
  1115  		body, err := io.ReadAll(resp.Body)
  1116  		if err != nil {
  1117  			dlog.Info(ctx, err)
  1118  			return false
  1119  		}
  1120  		r := string(body)
  1121  		if r != expectedOutput {
  1122  			dlog.Infof(ctx, "body: %q != %q", r, expectedOutput)
  1123  			return false
  1124  		}
  1125  		return true
  1126  	},
  1127  		time.Minute,   // waitFor
  1128  		5*time.Second, // polling interval
  1129  		`body of %q equals %q`, "http://"+svc, expectedOutput,
  1130  	)
  1131  }
  1132  
  1133  func WithConfig(c context.Context, modifierFunc func(config client.Config)) context.Context {
  1134  	// Quit a running daemon. We're changing the directory where its config resides.
  1135  	TelepresenceQuitOk(c)
  1136  
  1137  	t := getT(c)
  1138  	cfgVal := reflect.ValueOf(client.GetConfig(c)).Elem()
  1139  	cfgCopyVal := reflect.New(cfgVal.Type())
  1140  	cfgCopyVal.Elem().Set(cfgVal) // By value copy
  1141  	configCopy := cfgCopyVal.Interface()
  1142  	modifierFunc(configCopy.(client.Config))
  1143  	configYaml, err := yaml.Marshal(&configCopy)
  1144  	require.NoError(t, err)
  1145  	configYamlStr := string(configYaml)
  1146  	configDir := t.TempDir()
  1147  	c, err = SetConfig(c, configDir, configYamlStr)
  1148  	require.NoError(t, err)
  1149  	return c
  1150  }
  1151  
  1152  func WithKubeConfigExtension(ctx context.Context, extProducer func(*api.Cluster) map[string]any) context.Context {
  1153  	kc := KubeConfig(ctx)
  1154  	t := getT(ctx)
  1155  	cfg, err := clientcmd.LoadFromFile(kc)
  1156  	require.NoError(t, err, "unable to read %s", kc)
  1157  	cc := cfg.Contexts[cfg.CurrentContext]
  1158  	require.NotNil(t, cc, "unable to get current context from config")
  1159  	cluster := cfg.Clusters[cc.Cluster]
  1160  	require.NotNil(t, cluster, "unable to get current cluster from config")
  1161  
  1162  	raw, err := json.Marshal(extProducer(cluster))
  1163  	require.NoError(t, err, "unable to json.Marshal extension map")
  1164  	cluster.Extensions = map[string]k8sruntime.Object{"telepresence.io": &k8sruntime.Unknown{Raw: raw}}
  1165  
  1166  	context := *cc
  1167  	context.Cluster = "extra"
  1168  	cfg = &api.Config{
  1169  		Kind:           "Config",
  1170  		APIVersion:     "v1",
  1171  		Preferences:    api.Preferences{},
  1172  		Clusters:       map[string]*api.Cluster{"extra": cluster},
  1173  		Contexts:       map[string]*api.Context{"extra": &context},
  1174  		CurrentContext: "extra",
  1175  	}
  1176  	kubeconfigFileName := filepath.Join(t.TempDir(), "kubeconfig")
  1177  	require.NoError(t, clientcmd.WriteToFile(*cfg, kubeconfigFileName), "unable to write modified kubeconfig")
  1178  	return WithEnv(ctx, map[string]string{"KUBECONFIG": strings.Join([]string{kc, kubeconfigFileName}, string([]byte{os.PathListSeparator}))})
  1179  }
  1180  
  1181  func WithKubeConfig(ctx context.Context, cfg *api.Config) context.Context {
  1182  	t := getT(ctx)
  1183  	kubeconfigFileName := filepath.Join(t.TempDir(), "kubeconfig")
  1184  	require.NoError(t, clientcmd.WriteToFile(*cfg, kubeconfigFileName), "unable to write modified kubeconfig")
  1185  	return WithEnv(ctx, map[string]string{"KUBECONFIG": kubeconfigFileName})
  1186  }
  1187  
  1188  // RunningPods return the names of running pods with app=<service name>. Running here means
  1189  // that at least one container is still running. I.e. the pod might well be terminating
  1190  // but still considered running.
  1191  func RunningPods(ctx context.Context, svc, ns string) []string {
  1192  	out, err := KubectlOut(ctx, ns, "get", "pods", "-o", "json",
  1193  		"--field-selector", "status.phase==Running",
  1194  		"-l", "app="+svc)
  1195  	if err != nil {
  1196  		getT(ctx).Log(err.Error())
  1197  		return nil
  1198  	}
  1199  	var pm core.PodList
  1200  	if err := json.NewDecoder(strings.NewReader(out)).Decode(&pm); err != nil {
  1201  		getT(ctx).Log(err.Error())
  1202  		return nil
  1203  	}
  1204  	pods := make([]string, 0, len(pm.Items))
  1205  nextPod:
  1206  	for _, pod := range pm.Items {
  1207  		for _, cn := range pod.Status.ContainerStatuses {
  1208  			if r := cn.State.Running; r != nil && !r.StartedAt.IsZero() {
  1209  				// At least one container is still running.
  1210  				pods = append(pods, pod.Name)
  1211  				continue nextPod
  1212  			}
  1213  		}
  1214  	}
  1215  	dlog.Infof(ctx, "Running pods %v", pods)
  1216  	return pods
  1217  }