github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/integration_test/itest/cluster.go (about) 1 package itest 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "fmt" 8 "io" 9 "net" 10 "net/http" 11 "os" 12 "path/filepath" 13 "reflect" 14 "runtime" 15 "strconv" 16 "strings" 17 "sync" 18 "testing" 19 "time" 20 "unicode/utf8" 21 22 "github.com/sirupsen/logrus" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 "gopkg.in/yaml.v3" 26 core "k8s.io/api/core/v1" 27 rbac "k8s.io/api/rbac/v1" 28 "k8s.io/apimachinery/pkg/api/resource" 29 k8sruntime "k8s.io/apimachinery/pkg/runtime" 30 "k8s.io/client-go/tools/clientcmd" 31 "k8s.io/client-go/tools/clientcmd/api" 32 sigsYaml "sigs.k8s.io/yaml" 33 34 "github.com/datawire/dlib/dexec" 35 "github.com/datawire/dlib/dhttp" 36 "github.com/datawire/dlib/dlog" 37 "github.com/datawire/dlib/dtime" 38 "github.com/datawire/dtest" 39 telcharts "github.com/telepresenceio/telepresence/v2/charts" 40 "github.com/telepresenceio/telepresence/v2/pkg/client" 41 "github.com/telepresenceio/telepresence/v2/pkg/client/socket" 42 "github.com/telepresenceio/telepresence/v2/pkg/client/userd/k8s" 43 "github.com/telepresenceio/telepresence/v2/pkg/dos" 44 "github.com/telepresenceio/telepresence/v2/pkg/filelocation" 45 "github.com/telepresenceio/telepresence/v2/pkg/iputil" 46 "github.com/telepresenceio/telepresence/v2/pkg/log" 47 "github.com/telepresenceio/telepresence/v2/pkg/maps" 48 "github.com/telepresenceio/telepresence/v2/pkg/proc" 49 "github.com/telepresenceio/telepresence/v2/pkg/shellquote" 50 "github.com/telepresenceio/telepresence/v2/pkg/slice" 51 "github.com/telepresenceio/telepresence/v2/pkg/version" 52 ) 53 54 const ( 55 TestUser = "telepresence-test-developer" 56 ) 57 58 type Cluster interface { 59 CapturePodLogs(ctx context.Context, app, container, ns string) string 60 CompatVersion() string 61 Executable() (string, error) 62 GeneralError() error 63 GlobalEnv(context.Context) dos.MapEnv 64 AgentVersion(context.Context) string 65 Initialize(context.Context) context.Context 66 InstallTrafficManager(ctx context.Context, values map[string]string) error 67 InstallTrafficManagerVersion(ctx context.Context, version string, values map[string]string) error 68 IsCI() bool 69 IsIPv6() bool 70 Registry() string 71 SetGeneralError(error) 72 Suffix() string 73 TelepresenceVersion() string 74 UninstallTrafficManager(ctx context.Context, managerNamespace string, args ...string) 75 PackageHelmChart(ctx context.Context) (string, error) 76 GetValuesForHelm(ctx context.Context, values map[string]string, release bool) []string 77 GetSetArgsForHelm(ctx context.Context, values map[string]string, release bool) []string 78 GetK8SCluster(ctx context.Context, context, managerNamespace string) (context.Context, *k8s.Cluster, error) 79 TelepresenceHelmInstallOK(ctx context.Context, upgrade bool, args ...string) string 80 TelepresenceHelmInstall(ctx context.Context, upgrade bool, args ...string) (string, error) 81 UserdPProf() uint16 82 RootdPProf() uint16 83 } 84 85 // The cluster is created once and then reused by all tests. It ensures that: 86 // 87 // - executable and the images are built once 88 // - a docker repository is available 89 // - built images are pushed to the docker repository 90 // - a cluster is available 91 type cluster struct { 92 suffix string 93 isCI bool 94 prePushed bool 95 ipv6 bool 96 executable string 97 testVersion string 98 compatVersion string 99 registry string 100 kubeConfig string 101 generalError error 102 logCapturingPods sync.Map 103 userdPProf uint16 104 rootdPProf uint16 105 self Cluster 106 } 107 108 //nolint:gochecknoglobals // extension point 109 var ExtendClusterFunc = func(c Cluster) Cluster { 110 return c 111 } 112 113 func WithCluster(ctx context.Context, f func(ctx context.Context)) { 114 s := cluster{} 115 s.self = &s 116 ec := ExtendClusterFunc(&s) 117 ctx = withGlobalHarness(ctx, ec) 118 ctx = ec.Initialize(ctx) 119 defer s.tearDown(ctx) 120 t := getT(ctx) 121 if !t.Failed() { 122 f(s.withBasicConfig(ctx, t)) 123 } 124 } 125 126 func (s *cluster) SetSelf(self Cluster) { 127 s.self = self 128 } 129 130 func (s *cluster) imagesFromEnv(ctx context.Context) context.Context { 131 v := s.self.TelepresenceVersion()[1:] 132 r := s.self.Registry() 133 if img := ImageFromEnv(ctx, "DEV_MANAGER_IMAGE", v, r); img != nil { 134 ctx = WithImage(ctx, img) 135 } 136 if img := ImageFromEnv(ctx, "DEV_CLIENT_IMAGE", v, r); img != nil { 137 ctx = WithClientImage(ctx, img) 138 } 139 if img := ImageFromEnv(ctx, "DEV_AGENT_IMAGE", s.self.AgentVersion(ctx), r); img != nil { 140 ctx = WithAgentImage(ctx, img) 141 } 142 return ctx 143 } 144 145 func (s *cluster) AgentVersion(ctx context.Context) string { 146 return s.self.TelepresenceVersion()[1:] 147 } 148 149 func (s *cluster) Initialize(ctx context.Context) context.Context { 150 s.suffix, s.isCI = dos.LookupEnv(ctx, "GITHUB_SHA") 151 if s.isCI { 152 // Use 7 characters of SHA to avoid busting k8s 60 character name limit 153 if len(s.suffix) > 7 { 154 s.suffix = s.suffix[:7] 155 } 156 } else { 157 s.suffix = strconv.Itoa(os.Getpid()) 158 } 159 s.testVersion, s.prePushed = dos.LookupEnv(ctx, "DEV_TELEPRESENCE_VERSION") 160 if s.prePushed { 161 dlog.Infof(ctx, "Using pre-pushed binary %s", s.testVersion) 162 } else { 163 s.testVersion = "v2.14.0-gotest.z" + s.suffix 164 dlog.Infof(ctx, "Building temp binary %s", s.testVersion) 165 } 166 version.Version, version.Structured = version.Init(s.testVersion, "TELEPRESENCE_VERSION") 167 s.compatVersion = dos.Getenv(ctx, "DEV_COMPAT_VERSION") 168 169 t := getT(ctx) 170 s.registry = dos.Getenv(ctx, "DTEST_REGISTRY") 171 require.NoError(t, s.generalError) 172 ctx = s.imagesFromEnv(ctx) 173 174 if pp := dos.Getenv(ctx, "DEV_USERD_PROFILING_PORT"); pp != "" { 175 port, err := strconv.ParseUint(pp, 10, 16) 176 require.NoError(t, err) 177 s.userdPProf = uint16(port) 178 } 179 if pp := dos.Getenv(ctx, "DEV_ROOTD_PROFILING_PORT"); pp != "" { 180 port, err := strconv.ParseUint(pp, 10, 16) 181 require.NoError(t, err) 182 s.rootdPProf = uint16(port) 183 } 184 if s.prePushed { 185 exe := "telepresence" 186 if runtime.GOOS == "windows" { 187 exe = "telepresence.exe" 188 } 189 s.executable = filepath.Join(GetModuleRoot(ctx), "build-output", "bin", exe) 190 } 191 errs := make(chan error, 10) 192 wg := &sync.WaitGroup{} 193 wg.Add(3) 194 go s.ensureExecutable(ctx, errs, wg) 195 go s.ensureDockerImages(ctx, errs, wg) 196 go s.ensureCluster(ctx, wg) 197 wg.Wait() 198 close(errs) 199 for err := range errs { 200 assert.NoError(t, err) 201 } 202 203 if ipv6, err := strconv.ParseBool("DEV_IPV6_CLUSTER"); err == nil { 204 s.ipv6 = ipv6 205 } else { 206 output, err := Output(ctx, "kubectl", "--namespace", "kube-system", "get", "svc", "kube-dns", "-o", "jsonpath={.spec.clusterIP}") 207 if err == nil { 208 ip := iputil.Parse(strings.TrimSpace(output)) 209 if len(ip) == 16 { 210 dlog.Info(ctx, "Using IPv6 because the kube-dns.kube-system has an IPv6 IP") 211 s.ipv6 = true 212 } 213 } 214 } 215 216 s.ensureQuit(ctx) 217 _ = Run(ctx, "kubectl", "delete", "ns", "-l", "purpose=tp-cli-testing") 218 return ctx 219 } 220 221 func (s *cluster) tearDown(ctx context.Context) { 222 s.ensureQuit(ctx) 223 if s.kubeConfig != "" { 224 ctx = WithWorkingDir(ctx, GetOSSRoot(ctx)) 225 _ = Run(ctx, "kubectl", "delete", "-f", filepath.Join("testdata", "k8s", "client_rbac.yaml")) 226 _ = Run(ctx, "kubectl", "delete", "--wait=false", "ns", "-l", "purpose=tp-cli-testing") 227 } 228 } 229 230 func (s *cluster) ensureQuit(ctx context.Context) { 231 // Ensure that no telepresence is running when the tests start 232 _, _, _ = Telepresence(ctx, "quit", "-s") //nolint:dogsled // don't care about any of the returns 233 234 // Ensure that the daemon-socket is non-existent. 235 _ = rmAsRoot(ctx, socket.RootDaemonPath(ctx)) 236 } 237 238 func (s *cluster) ensureExecutable(ctx context.Context, errs chan<- error, wg *sync.WaitGroup) { 239 defer wg.Done() 240 if s.executable != "" { 241 return 242 } 243 244 ctx = WithModuleRoot(ctx) 245 exe := "telepresence" 246 env := map[string]string{ 247 "TELEPRESENCE_VERSION": s.testVersion, 248 "TELEPRESENCE_REGISTRY": s.registry, 249 } 250 if runtime.GOOS == "windows" { 251 env["CGO_ENABLED"] = "0" 252 exe += ".exe" 253 } 254 err := Run(WithEnv(ctx, env), "make", "build") 255 if err != nil { 256 errs <- err 257 return 258 } 259 s.executable = filepath.Join(GetWorkingDir(ctx), "build-output", "bin", exe) 260 } 261 262 func (s *cluster) ensureDocker(ctx context.Context, wg *sync.WaitGroup) { 263 defer wg.Done() 264 s.registry = dtest.DockerRegistry(log.WithDiscardingLogger(ctx)) 265 } 266 267 func (s *cluster) ensureDockerImages(ctx context.Context, errs chan<- error, wg *sync.WaitGroup) { 268 defer wg.Done() 269 if s.prePushed || s.isCI { 270 return 271 } 272 makeExe := "make" 273 if runtime.GOOS == "windows" { 274 makeExe = "winmake.bat" 275 } 276 277 // Initialize docker and build image simultaneously 278 wgs := &sync.WaitGroup{} 279 if s.registry == "" { 280 wgs.Add(1) 281 go s.ensureDocker(ctx, wgs) 282 } 283 284 runMake := func(target string) { 285 out, err := Command(WithEnv(WithModuleRoot(ctx), map[string]string{ 286 "TELEPRESENCE_VERSION": s.testVersion, 287 "TELEPRESENCE_REGISTRY": s.registry, 288 }), makeExe, target).CombinedOutput() 289 if err != nil { 290 errs <- RunError(err, out) 291 } 292 } 293 294 wgs.Add(2) 295 go func() { 296 defer wgs.Done() 297 runMake("tel2-image") 298 }() 299 go func() { 300 defer wgs.Done() 301 runMake("client-image") 302 }() 303 wgs.Wait() 304 305 // Image built and a registry exists. Push the image 306 runMake("push-images") 307 } 308 309 func (s *cluster) ensureCluster(ctx context.Context, wg *sync.WaitGroup) { 310 defer wg.Done() 311 if s.registry == "" { 312 dwg := sync.WaitGroup{} 313 dwg.Add(1) 314 s.ensureDocker(ctx, &dwg) 315 dwg.Wait() 316 } 317 t := getT(ctx) 318 s.kubeConfig = dos.Getenv(ctx, "DTEST_KUBECONFIG") 319 if s.kubeConfig == "" { 320 s.kubeConfig = dtest.Kubeconfig(log.WithDiscardingLogger(ctx)) 321 } 322 require.NoError(t, os.Chmod(s.kubeConfig, 0o600), "failed to chmod 0600 %q", s.kubeConfig) 323 324 // Delete any lingering traffic-manager resources that aren't bound to specific namespaces. 325 _ = Run(ctx, "kubectl", "delete", "mutatingwebhookconfiguration,role,rolebinding", "-l", "app=traffic-manager") 326 } 327 328 // PodCreateTimeout will return a timeout suitable for operations that create pods. 329 // This is longer when running against clusters that scale up nodes on demand for new pods. 330 func PodCreateTimeout(c context.Context) time.Duration { 331 switch GetProfile(c) { 332 case GkeAutopilotProfile: 333 return 5 * time.Minute 334 case DefaultProfile: 335 fallthrough 336 default: // this really shouldn't be happening but hey 337 return 180 * time.Second 338 } 339 } 340 341 func (s *cluster) withBasicConfig(c context.Context, t *testing.T) context.Context { 342 config := client.GetDefaultConfigFunc() 343 config.LogLevels().UserDaemon = logrus.DebugLevel 344 config.LogLevels().RootDaemon = logrus.DebugLevel 345 346 to := config.Timeouts() 347 to.PrivateClusterConnect = 60 * time.Second 348 to.PrivateEndpointDial = 10 * time.Second 349 to.PrivateHelm = PodCreateTimeout(c) 350 to.PrivateIntercept = 30 * time.Second 351 to.PrivateProxyDial = 30 * time.Second 352 to.PrivateRoundtripLatency = 5 * time.Second 353 to.PrivateTrafficManagerAPI = 120 * time.Second 354 to.PrivateTrafficManagerConnect = 180 * time.Second 355 356 images := config.Images() 357 images.PrivateRegistry = s.self.Registry() 358 if agentImage := GetAgentImage(c); agentImage != nil { 359 images.PrivateAgentImage = agentImage.FQName() 360 images.PrivateWebhookRegistry = agentImage.Registry 361 } 362 if clientImage := GetClientImage(c); clientImage != nil { 363 images.PrivateClientImage = clientImage.FQName() 364 } 365 366 config.Grpc().MaxReceiveSizeV, _ = resource.ParseQuantity("10Mi") 367 config.Intercept().UseFtp = true 368 369 configYaml, err := yaml.Marshal(&config) 370 require.NoError(t, err) 371 configYamlStr := string(configYaml) 372 373 configDir := t.TempDir() 374 c = filelocation.WithAppUserConfigDir(c, configDir) 375 c, err = SetConfig(c, configDir, configYamlStr) 376 require.NoError(t, err) 377 return c 378 } 379 380 func (s *cluster) GlobalEnv(ctx context.Context) dos.MapEnv { 381 globalEnv := dos.MapEnv{ 382 "KUBECONFIG": s.kubeConfig, 383 } 384 yes := struct{}{} 385 includeEnv := map[string]struct{}{ 386 "SCOUT_DISABLE": yes, 387 "HOME": yes, 388 "PATH": yes, 389 "LOGNAME": yes, 390 "USER": yes, 391 "TMPDIR": yes, 392 "MAKELEVEL": yes, 393 "TELEPRESENCE_MAX_LOGFILES": yes, 394 } 395 if runtime.GOOS == "windows" { 396 includeEnv["APPDATA"] = yes 397 includeEnv["AppData"] = yes 398 includeEnv["LOCALAPPDATA"] = yes 399 includeEnv["LocalAppData"] = yes 400 includeEnv["OS"] = yes 401 includeEnv["TEMP"] = yes 402 includeEnv["TMP"] = yes 403 includeEnv["Path"] = yes 404 includeEnv["PATHEXT"] = yes 405 includeEnv["ProgramFiles"] = yes 406 includeEnv["ProgramData"] = yes 407 includeEnv["SystemDrive"] = yes 408 includeEnv["USERPROFILE"] = yes 409 includeEnv["USERNAME"] = yes 410 includeEnv["windir"] = yes 411 } 412 for _, env := range dos.Environ(ctx) { 413 if eqIdx := strings.IndexByte(env, '='); eqIdx > 0 { 414 key := env[:eqIdx] 415 if _, ok := includeEnv[key]; ok { 416 globalEnv[key] = env[eqIdx+1:] 417 } 418 } 419 } 420 return globalEnv 421 } 422 423 func (s *cluster) Executable() (string, error) { 424 return s.executable, nil 425 } 426 427 func (s *cluster) GeneralError() error { 428 return s.generalError 429 } 430 431 func (s *cluster) IsCI() bool { 432 return s.isCI 433 } 434 435 func (s *cluster) IsIPv6() bool { 436 return s.ipv6 437 } 438 439 func (s *cluster) Registry() string { 440 return s.registry 441 } 442 443 func (s *cluster) SetGeneralError(err error) { 444 s.generalError = err 445 } 446 447 func (s *cluster) Suffix() string { 448 return s.suffix 449 } 450 451 func (s *cluster) TelepresenceVersion() string { 452 return s.testVersion 453 } 454 455 func (s *cluster) CompatVersion() string { 456 return s.compatVersion 457 } 458 459 func (s *cluster) UserdPProf() uint16 { 460 return s.userdPProf 461 } 462 463 func (s *cluster) RootdPProf() uint16 { 464 return s.rootdPProf 465 } 466 467 func (s *cluster) CapturePodLogs(ctx context.Context, app, container, ns string) string { 468 var pods []string 469 for i := 0; ; i++ { 470 runningPods := RunningPods(ctx, app, ns) 471 if len(runningPods) > 0 { 472 if container == "" { 473 pods = runningPods 474 } else { 475 for _, pod := range runningPods { 476 cns, err := KubectlOut(ctx, ns, "get", "pods", pod, "-o", "jsonpath={.spec.containers[*].name}") 477 if err == nil && slice.Contains(strings.Split(cns, " "), container) { 478 pods = append(pods, pod) 479 } 480 } 481 } 482 } 483 if len(pods) > 0 || i == 5 { 484 break 485 } 486 dtime.SleepWithContext(ctx, 2*time.Second) 487 } 488 489 if len(pods) == 0 { 490 if container == "" { 491 dlog.Errorf(ctx, "found no %s pods in namespace %s", app, ns) 492 } else { 493 dlog.Errorf(ctx, "found no %s pods in namespace %s with a %s container", app, ns, container) 494 } 495 return "" 496 } 497 present := struct{}{} 498 499 // Use another logger to avoid errors due to logs arriving after the tests complete. 500 ctx = dlog.WithLogger(ctx, dlog.WrapLogrus(logrus.StandardLogger())) 501 pod := pods[0] 502 key := pod 503 if container != "" { 504 key += "/" + container 505 } 506 if _, ok := s.logCapturingPods.LoadOrStore(key, present); ok { 507 return "" 508 } 509 510 logFile, err := os.Create( 511 filepath.Join(filelocation.AppUserLogDir(ctx), fmt.Sprintf("%s-%s.log", dtime.Now().Format("20060102T150405"), pod))) 512 if err != nil { 513 s.logCapturingPods.Delete(pod) 514 dlog.Errorf(ctx, "unable to create pod logfile %s: %v", logFile.Name(), err) 515 return "" 516 } 517 518 args := []string{"--namespace", ns, "logs", "-f", pod} 519 if container != "" { 520 args = append(args, "-c", container) 521 } 522 // Let command die when the pod that it logs die 523 cmd := Command(context.WithoutCancel(ctx), "kubectl", args...) 524 cmd.Stdout = logFile 525 cmd.Stderr = logFile 526 ready := make(chan string, 1) 527 go func() { 528 defer func() { 529 _ = logFile.Close() 530 s.logCapturingPods.Delete(pod) 531 }() 532 err := cmd.Start() 533 if err == nil { 534 if container == "" { 535 dlog.Infof(ctx, "Capturing logs for pod %s", pod) 536 } else { 537 dlog.Infof(ctx, "Capturing logs for pod %s, container %s", pod, container) 538 } 539 ready <- logFile.Name() 540 close(ready) 541 err = cmd.Wait() 542 } 543 if err != nil { 544 if container == "" { 545 dlog.Errorf(ctx, "log capture for pod %s failed: %v", pod, err) 546 } else { 547 dlog.Errorf(ctx, "log capture for pod %s, container %s failed: %v", pod, container, err) 548 } 549 select { 550 case <-ready: 551 default: 552 close(ready) 553 } 554 } 555 }() 556 select { 557 case <-ctx.Done(): 558 dlog.Infof(ctx, "log capture for pod %s interrupted prior to start", pod) 559 return "" 560 case file := <-ready: 561 return file 562 } 563 } 564 565 func (s *cluster) PackageHelmChart(ctx context.Context) (string, error) { 566 filename := filepath.Join(getT(ctx).TempDir(), "telepresence-chart.tgz") 567 fh, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o666) 568 if err != nil { 569 return "", err 570 } 571 if err := telcharts.WriteChart(telcharts.DirTypeTelepresence, fh, "telepresence", s.self.TelepresenceVersion()[1:]); err != nil { 572 _ = fh.Close() 573 return "", err 574 } 575 if err := fh.Close(); err != nil { 576 return "", err 577 } 578 return filename, nil 579 } 580 581 func (s *cluster) GetSetArgsForHelm(ctx context.Context, values map[string]string, release bool) []string { 582 settings := s.GetValuesForHelm(ctx, values, release) 583 args := make([]string, len(settings)*2) 584 n := 0 585 for _, s := range settings { 586 args[n] = "--set" 587 n++ 588 args[n] = s 589 n++ 590 } 591 return args 592 } 593 594 func (s *cluster) GetValuesForHelm(ctx context.Context, values map[string]string, release bool) []string { 595 nss := GetNamespaces(ctx) 596 settings := []string{ 597 "logLevel=debug", 598 "client.routing.allowConflictingSubnets={10.0.0.0/8}", 599 } 600 if len(nss.ManagedNamespaces) > 0 { 601 settings = append(settings, 602 fmt.Sprintf("clientRbac.namespaces=%s", nss.HelmString()), 603 fmt.Sprintf("managerRbac.namespaces=%s", nss.HelmString()), 604 ) 605 } 606 agentImage := GetAgentImage(ctx) 607 if agentImage != nil { 608 settings = append(settings, 609 fmt.Sprintf("agentInjector.agentImage.name=%s", agentImage.Name), // Prevent attempts to retrieve image from SystemA 610 fmt.Sprintf("agentInjector.agentImage.tag=%s", agentImage.Tag), 611 fmt.Sprintf("agentInjector.agentImage.registry=%s", agentImage.Registry)) 612 } 613 if !release { 614 settings = append(settings, fmt.Sprintf("image.registry=%s", s.self.Registry())) 615 } 616 617 for k, v := range values { 618 settings = append(settings, k+"="+v) 619 } 620 return settings 621 } 622 623 func (s *cluster) InstallTrafficManager(ctx context.Context, values map[string]string) error { 624 chartFilename, err := s.self.PackageHelmChart(ctx) 625 if err != nil { 626 return err 627 } 628 return s.installChart(ctx, false, chartFilename, values) 629 } 630 631 // InstallTrafficManagerVersion performs a helm install of a specific version of the traffic-manager using 632 // the helm registry at https://app.getambassador.io. It is assumed that the image to use for the traffic-manager 633 // can be pulled from the standard registry at docker.io/datawire, and that the traffic-manager image is 634 // configured using DEV_AGENT_IMAGE. 635 // 636 // The intent is to simulate connection to an older cluster from the current client. 637 func (s *cluster) InstallTrafficManagerVersion(ctx context.Context, version string, values map[string]string) error { 638 chartFilename, err := s.pullHelmChart(ctx, version) 639 if err != nil { 640 return err 641 } 642 return s.installChart(ctx, true, chartFilename, values) 643 } 644 645 func (s *cluster) installChart(ctx context.Context, release bool, chartFilename string, values map[string]string) error { 646 settings := s.self.GetSetArgsForHelm(ctx, values, release) 647 648 ctx = WithWorkingDir(ctx, GetOSSRoot(ctx)) 649 nss := GetNamespaces(ctx) 650 args := []string{"install", "-n", nss.Namespace, "--wait"} 651 args = append(args, settings...) 652 args = append(args, "traffic-manager", chartFilename) 653 654 err := Run(ctx, "helm", args...) 655 if err == nil { 656 err = RolloutStatusWait(ctx, nss.Namespace, "deploy/traffic-manager") 657 if err == nil { 658 s.self.CapturePodLogs(ctx, "traffic-manager", "", nss.Namespace) 659 } 660 } 661 return err 662 } 663 664 func (s *cluster) TelepresenceHelmInstallOK(ctx context.Context, upgrade bool, settings ...string) string { 665 logFile, err := s.self.TelepresenceHelmInstall(ctx, upgrade, settings...) 666 require.NoError(getT(ctx), err) 667 return logFile 668 } 669 670 func (s *cluster) TelepresenceHelmInstall(ctx context.Context, upgrade bool, settings ...string) (string, error) { 671 nss := GetNamespaces(ctx) 672 subjectNames := []string{TestUser} 673 subjects := make([]rbac.Subject, len(subjectNames)) 674 for i, s := range subjectNames { 675 subjects[i] = rbac.Subject{ 676 Kind: "ServiceAccount", 677 Name: s, 678 Namespace: nss.Namespace, 679 } 680 } 681 682 type xRbac struct { 683 Create bool `json:"create"` 684 Namespaced bool `json:"namespaced"` 685 Subjects []rbac.Subject `json:"subjects,omitempty"` 686 Namespaces []string `json:"namespaces,omitempty"` 687 } 688 type xAgent struct { 689 Image *Image `json:"image,omitempty"` 690 } 691 var agent *xAgent 692 if agentImage := GetAgentImage(ctx); agentImage != nil { 693 agent = &xAgent{Image: agentImage} 694 } 695 type xClient struct { 696 Routing map[string][]string `json:"routing"` 697 } 698 type xTimeouts struct { 699 AgentArrival string `json:"agentArrival,omitempty"` 700 } 701 nsl := nss.UniqueList() 702 vx := struct { 703 LogLevel string `json:"logLevel"` 704 MetritonEnabled bool `json:"metritonEnabled"` 705 Image *Image `json:"image,omitempty"` 706 Agent *xAgent `json:"agent,omitempty"` 707 ClientRbac xRbac `json:"clientRbac"` 708 ManagerRbac xRbac `json:"managerRbac"` 709 Client xClient `json:"client"` 710 Timeouts xTimeouts `json:"timeouts,omitempty"` 711 }{ 712 LogLevel: "debug", 713 MetritonEnabled: false, 714 Image: GetImage(ctx), 715 Agent: agent, 716 ClientRbac: xRbac{ 717 Create: true, 718 Namespaced: len(nss.ManagedNamespaces) > 0, 719 Subjects: subjects, 720 Namespaces: nsl, 721 }, 722 ManagerRbac: xRbac{ 723 Create: true, 724 Namespaced: len(nss.ManagedNamespaces) > 0, 725 Namespaces: nsl, 726 }, 727 Client: xClient{ 728 Routing: map[string][]string{ 729 "allowConflictingSubnets": {"10.0.0.0/8"}, 730 }, 731 }, 732 Timeouts: xTimeouts{AgentArrival: "60s"}, 733 } 734 ss, err := sigsYaml.Marshal(&vx) 735 if err != nil { 736 return "", err 737 } 738 valuesFile := filepath.Join(getT(ctx).TempDir(), "values.yaml") 739 if err := os.WriteFile(valuesFile, ss, 0o644); err != nil { 740 return "", err 741 } 742 743 verb := "install" 744 if upgrade { 745 verb = "upgrade" 746 } 747 args := []string{"helm", verb, "-n", nss.Namespace, "-f", valuesFile} 748 args = append(args, settings...) 749 750 if _, _, err = Telepresence(WithUser(ctx, "default"), args...); err != nil { 751 return "", err 752 } 753 if err = RolloutStatusWait(ctx, nss.Namespace, "deploy/traffic-manager"); err != nil { 754 return "", err 755 } 756 logFileName := s.self.CapturePodLogs(ctx, "traffic-manager", "", nss.Namespace) 757 return logFileName, nil 758 } 759 760 func (s *cluster) pullHelmChart(ctx context.Context, version string) (string, error) { 761 if err := Run(ctx, "helm", "repo", "add", "datawire", "https://app.getambassador.io"); err != nil { 762 return "", err 763 } 764 if err := Run(ctx, "helm", "repo", "update"); err != nil { 765 return "", err 766 } 767 dir := getT(ctx).TempDir() 768 if err := Run(WithWorkingDir(ctx, dir), "helm", "pull", "datawire/telepresence", "--version", version); err != nil { 769 return "", err 770 } 771 return filepath.Join(dir, fmt.Sprintf("telepresence-%s.tgz", version)), nil 772 } 773 774 func (s *cluster) UninstallTrafficManager(ctx context.Context, managerNamespace string, args ...string) { 775 t := getT(ctx) 776 ctx = WithUser(ctx, "default") 777 TelepresenceOk(ctx, append([]string{"helm", "uninstall", "--manager-namespace", managerNamespace}, args...)...) 778 779 // Helm uninstall does deletions asynchronously, so let's wait until the deployment is gone 780 assert.Eventually(t, func() bool { return len(RunningPods(ctx, "traffic-manager", managerNamespace)) == 0 }, 781 60*time.Second, 4*time.Second, "traffic-manager deployment was not removed") 782 TelepresenceQuitOk(ctx) 783 } 784 785 func (s *cluster) GetK8SCluster(ctx context.Context, context, managerNamespace string) (context.Context, *k8s.Cluster, error) { 786 _ = os.Setenv("KUBECONFIG", KubeConfig(ctx)) 787 flags := map[string]string{ 788 "namespace": managerNamespace, 789 } 790 if context != "" { 791 flags["context"] = context 792 } 793 cfgAndFlags, err := client.NewKubeconfig(ctx, flags, managerNamespace) 794 if err != nil { 795 return ctx, nil, err 796 } 797 kc, err := k8s.NewCluster(ctx, cfgAndFlags, nil) 798 if err != nil { 799 return ctx, nil, err 800 } 801 return kc.WithK8sInterface(ctx), kc, nil 802 } 803 804 func KubeConfig(ctx context.Context) string { 805 kubeConf, _ := LookupEnv(ctx, "KUBECONFIG") 806 return kubeConf 807 } 808 809 const sensitivePrefix = "--$sensitive$--" 810 811 // WrapSensitive wraps an argument sent to Command so that it doesn't get logged verbatim. This can 812 // be used for commands like "telepresence login --apikey NNNN" where the NNN shouldn't be visible 813 // in the logs. If NNN Is wrapped using this function, it will appear as "***" in the logs. 814 func WrapSensitive(s string) string { 815 return sensitivePrefix + s 816 } 817 818 // Command creates and returns a dexec.Cmd initialized with the global environment 819 // from the cluster harness and any other environment that has been added using the 820 // WithEnv() function. 821 func Command(ctx context.Context, executable string, args ...string) *dexec.Cmd { 822 getT(ctx).Helper() 823 // Ensure that command has a timestamp and is somewhat readable 824 dbgArgs := args 825 copied := false 826 for i, a := range args { 827 if strings.HasPrefix(a, sensitivePrefix) { 828 if !copied { 829 dbgArgs = make([]string, len(args)) 830 copy(dbgArgs, args) 831 args = make([]string, len(args)) 832 copy(args, dbgArgs) 833 copied = true 834 } 835 dbgArgs[i] = "***" 836 args[i] = strings.TrimPrefix(a, sensitivePrefix) 837 } 838 } 839 dlog.Debug(ctx, "executing ", shellquote.ShellString(filepath.Base(executable), dbgArgs)) 840 cmd := proc.CommandContext(ctx, executable, args...) 841 cmd.DisableLogging = true 842 cmd.Env = EnvironMap(ctx).Environ() 843 cmd.Dir = GetWorkingDir(ctx) 844 cmd.Stdin = dos.Stdin(ctx) 845 return cmd 846 } 847 848 func EnvironMap(ctx context.Context) dos.MapEnv { 849 env := GetGlobalHarness(ctx).GlobalEnv(ctx) 850 maps.Merge(env, getEnv(ctx)) 851 return env 852 } 853 854 // TelepresenceOk executes the CLI command in a new process and requires the result to be OK. 855 func TelepresenceOk(ctx context.Context, args ...string) string { 856 t := getT(ctx) 857 t.Helper() 858 stdout, stderr, err := Telepresence(ctx, args...) 859 require.NoError(t, err, "telepresence was unable to run, stdout %s", stdout) 860 if err == nil { 861 if strings.HasPrefix(stderr, "Warning:") && !strings.ContainsRune(stderr, '\n') { 862 // Accept warnings, but log them. 863 dlog.Warn(ctx, stderr) 864 } else { 865 assert.Empty(t, stderr, "Expected stderr to be empty, but got: %s", stderr) 866 } 867 } 868 return stdout 869 } 870 871 // Telepresence executes the CLI command in a new process. 872 func Telepresence(ctx context.Context, args ...string) (string, string, error) { 873 t := getT(ctx) 874 t.Helper() 875 cmd := TelepresenceCmd(ctx, args...) 876 stdout := cmd.Stdout.(*strings.Builder) 877 stderr := cmd.Stderr.(*strings.Builder) 878 err := cmd.Run() 879 errStr := strings.TrimSpace(stderr.String()) 880 if err != nil { 881 err = RunError(err, []byte(errStr)) 882 } 883 return strings.TrimSpace(stdout.String()), errStr, err 884 } 885 886 // TelepresenceCmd creates a dexec.Cmd using the Command function. Before the command is created, 887 // the environment is extended with DEV_TELEPRESENCE_CONFIG_DIR from filelocation.AppUserConfigDir 888 // and DEV_TELEPRESENCE_LOG_DIR from filelocation.AppUserLogDir. 889 func TelepresenceCmd(ctx context.Context, args ...string) *dexec.Cmd { 890 t := getT(ctx) 891 t.Helper() 892 893 var stdout, stderr strings.Builder 894 ctx = WithEnv(ctx, map[string]string{ 895 "DEV_TELEPRESENCE_CONFIG_DIR": filelocation.AppUserConfigDir(ctx), 896 "DEV_TELEPRESENCE_LOG_DIR": filelocation.AppUserLogDir(ctx), 897 }) 898 899 gh := GetGlobalHarness(ctx) 900 if len(args) > 0 && (args[0] == "connect") { 901 rest := args[1:] 902 args = append(make([]string, 0, len(args)+3), args[0]) 903 if user := GetUser(ctx); user != "default" { 904 args = append(args, "--as", "system:serviceaccount:"+user) 905 } 906 if gh.UserdPProf() > 0 { 907 args = append(args, "--userd-profiling-port", strconv.Itoa(int(gh.UserdPProf()))) 908 } 909 if gh.RootdPProf() > 0 { 910 args = append(args, "--rootd-profiling-port", strconv.Itoa(int(gh.RootdPProf()))) 911 } 912 args = append(args, rest...) 913 } 914 exe, _ := gh.Executable() 915 cmd := Command(ctx, exe, args...) 916 cmd.Stdout = &stdout 917 cmd.Stderr = &stderr 918 return cmd 919 } 920 921 // TelepresenceDisconnectOk tells telepresence to quit and asserts that the stdout contains the correct output. 922 func TelepresenceDisconnectOk(ctx context.Context, args ...string) { 923 AssertDisconnectOutput(ctx, TelepresenceOk(ctx, append([]string{"quit"}, args...)...)) 924 } 925 926 // AssertDisconnectOutput asserts that the stdout contains the correct output from a telepresence quit command. 927 func AssertDisconnectOutput(ctx context.Context, stdout string) { 928 t := getT(ctx) 929 assert.True(t, strings.Contains(stdout, "Disconnected") || strings.Contains(stdout, "Not connected")) 930 if t.Failed() { 931 t.Logf("Disconnect output was %q", stdout) 932 } 933 } 934 935 // TelepresenceQuitOk tells telepresence to quit and asserts that the stdout contains the correct output. 936 func TelepresenceQuitOk(ctx context.Context) { 937 AssertQuitOutput(ctx, TelepresenceOk(ctx, "quit", "-s")) 938 } 939 940 // AssertQuitOutput asserts that the stdout contains the correct output from a telepresence quit command. 941 func AssertQuitOutput(ctx context.Context, stdout string) { 942 t := getT(ctx) 943 assert.True(t, strings.Contains(stdout, "Telepresence Daemons quitting...done") || 944 strings.Contains(stdout, "Telepresence Daemons have already quit")) 945 if t.Failed() { 946 t.Logf("Quit output was %q", stdout) 947 } 948 } 949 950 // RunError checks if the given err is a *exit.ExitError, and if so, extracts 951 // Stderr and the ExitCode from it. 952 func RunError(err error, out []byte) error { 953 if ee, ok := err.(*dexec.ExitError); ok { 954 switch { 955 case len(ee.Stderr) > 0: 956 err = fmt.Errorf("%s, exit code %d", string(ee.Stderr), ee.ExitCode()) 957 case utf8.ValidString(string(out)): 958 err = fmt.Errorf("%s, exit code %d", string(out), ee.ExitCode()) 959 default: 960 err = fmt.Errorf("exit code %d", ee.ExitCode()) 961 } 962 } 963 return err 964 } 965 966 // Run runs the given command and arguments and returns an error if the command failed. 967 func Run(ctx context.Context, exe string, args ...string) error { 968 getT(ctx).Helper() 969 out, err := Command(ctx, exe, args...).CombinedOutput() 970 if err != nil { 971 return RunError(err, out) 972 } 973 return nil 974 } 975 976 // Output runs the given command and arguments and returns its combined output and an error if the command failed. 977 func Output(ctx context.Context, exe string, args ...string) (string, error) { 978 getT(ctx).Helper() 979 cmd := Command(ctx, exe, args...) 980 stderr := bytes.Buffer{} 981 cmd.Stderr = &stderr 982 out, err := cmd.Output() 983 if err != nil { 984 return string(out), RunError(err, stderr.Bytes()) 985 } 986 return string(out), nil 987 } 988 989 // Kubectl runs kubectl with the default context and the given namespace, or in the default namespace if the given 990 // namespace is an empty string. 991 func Kubectl(ctx context.Context, namespace string, args ...string) error { 992 getT(ctx).Helper() 993 var ks []string 994 if namespace != "" { 995 ks = append(make([]string, 0, len(args)+2), "--namespace", namespace) 996 ks = append(ks, args...) 997 } else { 998 ks = args 999 } 1000 return Run(ctx, "kubectl", ks...) 1001 } 1002 1003 // KubectlOut runs kubectl with the default context and the application namespace and returns its combined output. 1004 func KubectlOut(ctx context.Context, namespace string, args ...string) (string, error) { 1005 getT(ctx).Helper() 1006 var ks []string 1007 if namespace != "" { 1008 ks = append(make([]string, 0, len(args)+2), "--namespace", namespace) 1009 ks = append(ks, args...) 1010 } else { 1011 ks = args 1012 } 1013 return Output(ctx, "kubectl", ks...) 1014 } 1015 1016 func CreateNamespaces(ctx context.Context, namespaces ...string) { 1017 t := getT(ctx) 1018 t.Helper() 1019 wg := sync.WaitGroup{} 1020 wg.Add(len(namespaces)) 1021 for _, ns := range namespaces { 1022 go func(ns string) { 1023 defer wg.Done() 1024 assert.NoError(t, Kubectl(ctx, "", "create", "namespace", ns), "failed to create namespace %q", ns) 1025 assert.NoError(t, Kubectl(ctx, "", "label", "namespace", ns, "purpose="+purposeLabel, fmt.Sprintf("app.kubernetes.io/name=%s", ns))) 1026 }(ns) 1027 } 1028 wg.Wait() 1029 } 1030 1031 func DeleteNamespaces(ctx context.Context, namespaces ...string) { 1032 t := getT(ctx) 1033 t.Helper() 1034 wg := sync.WaitGroup{} 1035 wg.Add(len(namespaces)) 1036 for _, ns := range namespaces { 1037 if t.Failed() { 1038 if out, err := KubectlOut(ctx, ns, "get", "events", "--field-selector", "type!=Normal"); err == nil { 1039 dlog.Debugf(ctx, "Events where type != Normal from namespace %s\n%s", ns, out) 1040 } 1041 } 1042 go func(ns string) { 1043 defer wg.Done() 1044 assert.NoError(t, Kubectl(ctx, "", "delete", "namespace", "--wait=false", ns)) 1045 }(ns) 1046 } 1047 wg.Wait() 1048 } 1049 1050 // StartLocalHttpEchoServerWithHost is like StartLocalHttpEchoServer but binds to a specific host instead of localhost. 1051 func StartLocalHttpEchoServerWithHost(ctx context.Context, name string, host string) (int, context.CancelFunc) { 1052 ctx, cancel := context.WithCancel(ctx) 1053 lc := net.ListenConfig{} 1054 l, err := lc.Listen(ctx, "tcp", net.JoinHostPort(host, "0")) 1055 require.NoError(getT(ctx), err, "failed to listen on localhost") 1056 go func() { 1057 sc := &dhttp.ServerConfig{ 1058 Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 1059 fmt.Fprintf(w, "%s from intercept at %s", name, r.URL.Path) 1060 }), 1061 } 1062 err := sc.Serve(ctx, l) 1063 if err != nil { 1064 dlog.Errorf(ctx, "http server on %s exited with error: %v", host, err) 1065 } else { 1066 dlog.Errorf(ctx, "http server on %s exited", host) 1067 } 1068 }() 1069 return l.Addr().(*net.TCPAddr).Port, cancel 1070 } 1071 1072 // StartLocalHttpEchoServer starts a local http server that echoes a line with the given name and 1073 // the current URL path. The port is returned together with function that cancels the server. 1074 func StartLocalHttpEchoServer(ctx context.Context, name string) (int, context.CancelFunc) { 1075 return StartLocalHttpEchoServerWithHost(ctx, name, "localhost") 1076 } 1077 1078 // PingInterceptedEchoServer assumes that a server has been created using StartLocalHttpEchoServer and 1079 // that an intercept is active for the given svc and svcPort that will redirect to that local server. 1080 func PingInterceptedEchoServer(ctx context.Context, svc, svcPort string, headers ...string) { 1081 wl := svc 1082 if slashIdx := strings.IndexByte(svc, '/'); slashIdx > 0 { 1083 wl = svc[slashIdx+1:] 1084 svc = svc[:slashIdx] 1085 } 1086 expectedOutput := fmt.Sprintf("%s from intercept at /", wl) 1087 require.Eventually(getT(ctx), func() bool { 1088 // condition 1089 ips, err := net.DefaultResolver.LookupIP(ctx, "ip", svc) 1090 if err != nil { 1091 dlog.Info(ctx, err) 1092 return false 1093 } 1094 if len(ips) != 1 { 1095 dlog.Infof(ctx, "Lookup for %s returned %v", svc, ips) 1096 return false 1097 } 1098 1099 hc := http.Client{Timeout: 2 * time.Second} 1100 rq, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://%s", net.JoinHostPort(ips[0].String(), svcPort)), nil) 1101 if err != nil { 1102 dlog.Info(ctx, err) 1103 return false 1104 } 1105 for _, h := range headers { 1106 kv := strings.SplitN(h, "=", 2) 1107 rq.Header[kv[0]] = []string{kv[1]} 1108 } 1109 resp, err := hc.Do(rq) 1110 if err != nil { 1111 dlog.Info(ctx, err) 1112 return false 1113 } 1114 defer resp.Body.Close() 1115 body, err := io.ReadAll(resp.Body) 1116 if err != nil { 1117 dlog.Info(ctx, err) 1118 return false 1119 } 1120 r := string(body) 1121 if r != expectedOutput { 1122 dlog.Infof(ctx, "body: %q != %q", r, expectedOutput) 1123 return false 1124 } 1125 return true 1126 }, 1127 time.Minute, // waitFor 1128 5*time.Second, // polling interval 1129 `body of %q equals %q`, "http://"+svc, expectedOutput, 1130 ) 1131 } 1132 1133 func WithConfig(c context.Context, modifierFunc func(config client.Config)) context.Context { 1134 // Quit a running daemon. We're changing the directory where its config resides. 1135 TelepresenceQuitOk(c) 1136 1137 t := getT(c) 1138 cfgVal := reflect.ValueOf(client.GetConfig(c)).Elem() 1139 cfgCopyVal := reflect.New(cfgVal.Type()) 1140 cfgCopyVal.Elem().Set(cfgVal) // By value copy 1141 configCopy := cfgCopyVal.Interface() 1142 modifierFunc(configCopy.(client.Config)) 1143 configYaml, err := yaml.Marshal(&configCopy) 1144 require.NoError(t, err) 1145 configYamlStr := string(configYaml) 1146 configDir := t.TempDir() 1147 c, err = SetConfig(c, configDir, configYamlStr) 1148 require.NoError(t, err) 1149 return c 1150 } 1151 1152 func WithKubeConfigExtension(ctx context.Context, extProducer func(*api.Cluster) map[string]any) context.Context { 1153 kc := KubeConfig(ctx) 1154 t := getT(ctx) 1155 cfg, err := clientcmd.LoadFromFile(kc) 1156 require.NoError(t, err, "unable to read %s", kc) 1157 cc := cfg.Contexts[cfg.CurrentContext] 1158 require.NotNil(t, cc, "unable to get current context from config") 1159 cluster := cfg.Clusters[cc.Cluster] 1160 require.NotNil(t, cluster, "unable to get current cluster from config") 1161 1162 raw, err := json.Marshal(extProducer(cluster)) 1163 require.NoError(t, err, "unable to json.Marshal extension map") 1164 cluster.Extensions = map[string]k8sruntime.Object{"telepresence.io": &k8sruntime.Unknown{Raw: raw}} 1165 1166 context := *cc 1167 context.Cluster = "extra" 1168 cfg = &api.Config{ 1169 Kind: "Config", 1170 APIVersion: "v1", 1171 Preferences: api.Preferences{}, 1172 Clusters: map[string]*api.Cluster{"extra": cluster}, 1173 Contexts: map[string]*api.Context{"extra": &context}, 1174 CurrentContext: "extra", 1175 } 1176 kubeconfigFileName := filepath.Join(t.TempDir(), "kubeconfig") 1177 require.NoError(t, clientcmd.WriteToFile(*cfg, kubeconfigFileName), "unable to write modified kubeconfig") 1178 return WithEnv(ctx, map[string]string{"KUBECONFIG": strings.Join([]string{kc, kubeconfigFileName}, string([]byte{os.PathListSeparator}))}) 1179 } 1180 1181 func WithKubeConfig(ctx context.Context, cfg *api.Config) context.Context { 1182 t := getT(ctx) 1183 kubeconfigFileName := filepath.Join(t.TempDir(), "kubeconfig") 1184 require.NoError(t, clientcmd.WriteToFile(*cfg, kubeconfigFileName), "unable to write modified kubeconfig") 1185 return WithEnv(ctx, map[string]string{"KUBECONFIG": kubeconfigFileName}) 1186 } 1187 1188 // RunningPods return the names of running pods with app=<service name>. Running here means 1189 // that at least one container is still running. I.e. the pod might well be terminating 1190 // but still considered running. 1191 func RunningPods(ctx context.Context, svc, ns string) []string { 1192 out, err := KubectlOut(ctx, ns, "get", "pods", "-o", "json", 1193 "--field-selector", "status.phase==Running", 1194 "-l", "app="+svc) 1195 if err != nil { 1196 getT(ctx).Log(err.Error()) 1197 return nil 1198 } 1199 var pm core.PodList 1200 if err := json.NewDecoder(strings.NewReader(out)).Decode(&pm); err != nil { 1201 getT(ctx).Log(err.Error()) 1202 return nil 1203 } 1204 pods := make([]string, 0, len(pm.Items)) 1205 nextPod: 1206 for _, pod := range pm.Items { 1207 for _, cn := range pod.Status.ContainerStatuses { 1208 if r := cn.State.Running; r != nil && !r.StartedAt.IsZero() { 1209 // At least one container is still running. 1210 pods = append(pods, pod.Name) 1211 continue nextPod 1212 } 1213 } 1214 } 1215 dlog.Infof(ctx, "Running pods %v", pods) 1216 return pods 1217 }