github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/pkg/client/userd/trafficmgr/session.go (about) 1 package trafficmgr 2 3 import ( 4 "context" 5 "encoding/json" 6 "errors" 7 "fmt" 8 "net" 9 "net/http" 10 "net/url" 11 "os" 12 "os/user" 13 "slices" 14 "sort" 15 "strings" 16 "sync" 17 "time" 18 19 "github.com/blang/semver/v4" 20 "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" 21 "google.golang.org/grpc" 22 "google.golang.org/grpc/codes" 23 "google.golang.org/grpc/status" 24 "google.golang.org/protobuf/types/known/durationpb" 25 empty "google.golang.org/protobuf/types/known/emptypb" 26 "gopkg.in/yaml.v3" 27 core "k8s.io/api/core/v1" 28 k8serrors "k8s.io/apimachinery/pkg/api/errors" 29 meta "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/types" 31 "k8s.io/client-go/util/homedir" 32 33 "github.com/datawire/dlib/dcontext" 34 "github.com/datawire/dlib/dgroup" 35 "github.com/datawire/dlib/dlog" 36 "github.com/datawire/dlib/dtime" 37 "github.com/datawire/k8sapi/pkg/k8sapi" 38 "github.com/telepresenceio/telepresence/rpc/v2/authenticator" 39 "github.com/telepresenceio/telepresence/rpc/v2/common" 40 "github.com/telepresenceio/telepresence/rpc/v2/connector" 41 rpc "github.com/telepresenceio/telepresence/rpc/v2/connector" 42 rootdRpc "github.com/telepresenceio/telepresence/rpc/v2/daemon" 43 "github.com/telepresenceio/telepresence/rpc/v2/manager" 44 "github.com/telepresenceio/telepresence/v2/pkg/agentconfig" 45 authGrpc "github.com/telepresenceio/telepresence/v2/pkg/authenticator/grpc" 46 "github.com/telepresenceio/telepresence/v2/pkg/authenticator/patcher" 47 "github.com/telepresenceio/telepresence/v2/pkg/client" 48 "github.com/telepresenceio/telepresence/v2/pkg/client/cli/daemon" 49 "github.com/telepresenceio/telepresence/v2/pkg/client/k8sclient" 50 "github.com/telepresenceio/telepresence/v2/pkg/client/rootd" 51 "github.com/telepresenceio/telepresence/v2/pkg/client/scout" 52 "github.com/telepresenceio/telepresence/v2/pkg/client/socket" 53 "github.com/telepresenceio/telepresence/v2/pkg/client/userd" 54 "github.com/telepresenceio/telepresence/v2/pkg/client/userd/k8s" 55 "github.com/telepresenceio/telepresence/v2/pkg/dnet" 56 "github.com/telepresenceio/telepresence/v2/pkg/errcat" 57 "github.com/telepresenceio/telepresence/v2/pkg/iputil" 58 "github.com/telepresenceio/telepresence/v2/pkg/matcher" 59 "github.com/telepresenceio/telepresence/v2/pkg/proc" 60 "github.com/telepresenceio/telepresence/v2/pkg/restapi" 61 ) 62 63 type apiServer struct { 64 restapi.Server 65 cancel context.CancelFunc 66 } 67 68 type apiMatcher struct { 69 requestMatcher matcher.Request 70 metadata map[string]string 71 } 72 73 type session struct { 74 *k8s.Cluster 75 rootDaemon rootdRpc.DaemonClient 76 subnetViaWorkloads []*rootdRpc.SubnetViaWorkload 77 78 // local information 79 installID string // telepresence's install ID 80 userAndHost string // "laptop-username@laptop-hostname" 81 82 // Kubernetes Port Forward Dialer 83 pfDialer dnet.PortForwardDialer 84 85 // manager client 86 managerClient manager.ManagerClient 87 88 // manager client connection 89 managerConn *grpc.ClientConn 90 91 // name reported by the manager 92 managerName string 93 94 // version reported by the manager 95 managerVersion semver.Version 96 97 // The identifier for this daemon 98 daemonID *daemon.Identifier 99 100 sessionInfo *manager.SessionInfo // sessionInfo returned by the traffic-manager 101 102 wlWatcher *workloadsAndServicesWatcher 103 104 // currentInterceptsLock ensures that all accesses to currentIntercepts, currentMatchers, 105 // currentAPIServers, interceptWaiters, and ingressInfo are synchronized 106 // 107 currentInterceptsLock sync.Mutex 108 109 // currentIntercepts is the latest snapshot returned by the intercept watcher. It 110 // is keyeed by the intercept ID 111 currentIntercepts map[string]*intercept 112 113 // currentMatches hold the matchers used when using the APIServer. 114 currentMatchers map[string]*apiMatcher 115 116 // currentAPIServers contains the APIServer in use. Typically zero or only one, but since the 117 // port is determined by the intercept, there might theoretically be serveral. 118 currentAPIServers map[int]*apiServer 119 120 // Map of desired awaited intercepts. Keyed by intercept name, because it 121 // is filled in prior to the intercept being created. Entries are short lived. They 122 // are deleted as soon as the intercept arrives and gets stored in currentIntercepts 123 interceptWaiters map[string]*awaitIntercept 124 125 ingressInfo []*manager.IngressInfo 126 127 isPodDaemon bool 128 129 sessionConfig client.Config 130 131 // done is closed when the session ends 132 done chan struct{} 133 134 // Possibly extended version of the session. Use when calling interface methods. 135 self userd.Session 136 } 137 138 func NewSession( 139 ctx context.Context, 140 cr *rpc.ConnectRequest, 141 config *client.Kubeconfig, 142 ) (_ context.Context, _ userd.Session, info *connector.ConnectInfo) { 143 dlog.Info(ctx, "-- Starting new session") 144 145 connectStart := time.Now() 146 defer func() { 147 if info.Error == connector.ConnectInfo_UNSPECIFIED { 148 scout.Report(ctx, "connect", 149 scout.Entry{ 150 Key: "time_to_connect", 151 Value: time.Since(connectStart).Seconds(), 152 }, scout.Entry{ 153 Key: "mapped_namespaces", 154 Value: len(cr.MappedNamespaces), 155 }) 156 } else { 157 scout.Report(ctx, "connect_error", 158 scout.Entry{ 159 Key: "error", 160 Value: info.ErrorText, 161 }, scout.Entry{ 162 Key: "error_type", 163 Value: info.Error.String(), 164 }, scout.Entry{ 165 Key: "error_category", 166 Value: info.ErrorCategory, 167 }, scout.Entry{ 168 Key: "time_to_fail", 169 Value: time.Since(connectStart).Seconds(), 170 }, scout.Entry{ 171 Key: "mapped_namespaces", 172 Value: len(cr.MappedNamespaces), 173 }) 174 } 175 }() 176 177 dlog.Info(ctx, "Connecting to k8s cluster...") 178 cluster, err := k8s.ConnectCluster(ctx, cr, config) 179 if err != nil { 180 dlog.Errorf(ctx, "unable to track k8s cluster: %+v", err) 181 return ctx, nil, connectError(rpc.ConnectInfo_CLUSTER_FAILED, err) 182 } 183 dlog.Infof(ctx, "Connected to context %s, namespace %s (%s)", cluster.Context, cluster.Namespace, cluster.Server) 184 185 ctx = cluster.WithK8sInterface(ctx) 186 scout.SetMetadatum(ctx, "cluster_id", cluster.GetClusterId(ctx)) 187 188 dlog.Info(ctx, "Connecting to traffic manager...") 189 tmgr, err := connectMgr(ctx, cluster, scout.InstallID(ctx), cr) 190 if err != nil { 191 dlog.Errorf(ctx, "Unable to connect to session: %s", err) 192 return ctx, nil, connectError(rpc.ConnectInfo_TRAFFIC_MANAGER_FAILED, err) 193 } 194 195 // store session in ctx for reporting 196 ctx = scout.WithSession(ctx, tmgr) 197 198 tmgr.sessionConfig = client.GetDefaultConfig() 199 cliCfg, err := tmgr.managerClient.GetClientConfig(ctx, &empty.Empty{}) 200 if err != nil { 201 if status.Code(err) != codes.Unimplemented { 202 dlog.Warnf(ctx, "Failed to get remote config from traffic manager: %v", err) 203 } 204 } else { 205 if err := yaml.Unmarshal(cliCfg.ConfigYaml, tmgr.sessionConfig); err != nil { 206 dlog.Warnf(ctx, "Failed to deserialize remote config: %v", err) 207 } 208 if err := tmgr.ApplyConfig(ctx); err != nil { 209 dlog.Warnf(ctx, "failed to apply config from traffic-manager: %v", err) 210 } 211 if err := cluster.AddRemoteKubeConfigExtension(ctx, cliCfg.ConfigYaml); err != nil { 212 dlog.Warnf(ctx, "Failed to set remote kubeconfig values: %v", err) 213 } 214 } 215 ctx = dnet.WithPortForwardDialer(ctx, tmgr.pfDialer) 216 217 oi := tmgr.getOutboundInfo(ctx, cr) 218 if !userd.GetService(ctx).RootSessionInProcess() { 219 // Connect to the root daemon if it is running. It's the CLI that starts it initially 220 rootRunning, err := socket.IsRunning(ctx, socket.RootDaemonPath(ctx)) 221 if err != nil { 222 return ctx, nil, connectError(rpc.ConnectInfo_DAEMON_FAILED, err) 223 } 224 if !rootRunning { 225 return ctx, nil, connectError(rpc.ConnectInfo_DAEMON_FAILED, errors.New("rot daemon is not running")) 226 } 227 228 if client.GetConfig(ctx).Cluster().ConnectFromRootDaemon { 229 // Root daemon needs this to authenticate with the cluster. Potential exec configurations in the kubeconfig 230 // must be executed by the user, not by root. 231 konfig, err := patcher.CreateExternalKubeConfig(ctx, config.ClientConfig, cluster.Context, func([]string) (string, string, error) { 232 s := userd.GetService(ctx) 233 if _, ok := s.Server().GetServiceInfo()[authenticator.Authenticator_ServiceDesc.ServiceName]; !ok { 234 authGrpc.RegisterAuthenticatorServer(s.Server(), config.ClientConfig) 235 } 236 return client.GetExe(ctx), s.ListenerAddress(ctx), nil 237 }, nil) 238 if err != nil { 239 return ctx, nil, connectError(rpc.ConnectInfo_DAEMON_FAILED, err) 240 } 241 patcher.AnnotateOutboundInfo(ctx, oi, konfig.CurrentContext) 242 } 243 } 244 245 tmgr.rootDaemon, err = tmgr.connectRootDaemon(ctx, oi, cr.IsPodDaemon) 246 if err != nil { 247 tmgr.managerConn.Close() 248 return ctx, nil, connectError(rpc.ConnectInfo_DAEMON_FAILED, err) 249 } 250 if err != nil { 251 return ctx, nil, connectError(rpc.ConnectInfo_DAEMON_FAILED, err) 252 } 253 254 // Collect data on how long connection time took 255 dlog.Debug(ctx, "Finished connecting to traffic manager") 256 257 tmgr.AddNamespaceListener(ctx, tmgr.updateDaemonNamespaces) 258 return ctx, tmgr, tmgr.status(ctx, true) 259 } 260 261 // SetSelf is for internal use by extensions. 262 func (s *session) SetSelf(self userd.Session) { 263 s.self = self 264 } 265 266 // RunSession (1) starts up with ensuring that the manager is installed and running, 267 // but then for most of its life 268 // - (2) calls manager.ArriveAsClient and then periodically calls manager.Remain 269 // - run the intercepts (manager.WatchIntercepts) and then 270 // - (3) listen on the appropriate local ports and forward them to the intercepted 271 // Services, and 272 // - (4) mount the appropriate remote volumes. 273 func (s *session) RunSession(c context.Context) error { 274 self := s.self 275 g := dgroup.NewGroup(c, dgroup.GroupConfig{}) 276 defer func() { 277 self.Epilog(c) 278 }() 279 self.StartServices(g) 280 return g.Wait() 281 } 282 283 func (s *session) RootDaemon() rootdRpc.DaemonClient { 284 return s.rootDaemon 285 } 286 287 func (s *session) ManagerClient() manager.ManagerClient { 288 return s.managerClient 289 } 290 291 func (s *session) ManagerConn() *grpc.ClientConn { 292 return s.managerConn 293 } 294 295 func (s *session) ManagerName() string { 296 return s.managerName 297 } 298 299 func (s *session) ManagerVersion() semver.Version { 300 return s.managerVersion 301 } 302 303 func (s *session) getSessionConfig() client.Config { 304 return s.sessionConfig 305 } 306 307 // connectMgr returns a session for the given cluster that is connected to the traffic-manager. 308 func connectMgr( 309 ctx context.Context, 310 cluster *k8s.Cluster, 311 installID string, 312 cr *rpc.ConnectRequest, 313 ) (*session, error) { 314 tos := client.GetConfig(ctx).Timeouts() 315 316 ctx, cancel := tos.TimeoutContext(ctx, client.TimeoutTrafficManagerConnect) 317 defer cancel() 318 319 userinfo, err := user.Current() 320 if err != nil { 321 return nil, fmt.Errorf("unable to obtain current user: %w", err) 322 } 323 host, err := os.Hostname() 324 if err != nil { 325 return nil, fmt.Errorf("unable to obtain hostname: %w", err) 326 } 327 328 err = CheckTrafficManagerService(ctx, cluster.GetManagerNamespace()) 329 if err != nil { 330 return nil, err 331 } 332 333 dlog.Debug(ctx, "creating port-forward") 334 pfDialer, err := dnet.NewK8sPortForwardDialer(ctx, cluster.Kubeconfig.RestConfig, k8sapi.GetK8sInterface(ctx)) 335 if err != nil { 336 return nil, err 337 } 338 conn, mClient, vi, err := k8sclient.ConnectToManager(ctx, cluster.GetManagerNamespace(), pfDialer.Dial) 339 if err != nil { 340 return nil, err 341 } 342 managerVersion, err := semver.Parse(strings.TrimPrefix(vi.Version, "v")) 343 if err != nil { 344 return nil, fmt.Errorf("unable to parse manager.Version: %w", err) 345 } 346 347 userAndHost := fmt.Sprintf("%s@%s", userinfo.Username, host) 348 349 daemonID, err := daemon.NewIdentifier(cr.Name, cluster.Context, cluster.Namespace, proc.RunningInContainer()) 350 if err != nil { 351 return nil, err 352 } 353 si, err := LoadSessionInfoFromUserCache(ctx, daemonID) 354 if err != nil { 355 return nil, err 356 } 357 358 svc := userd.GetService(ctx) 359 if si != nil { 360 // Check if the session is still valid in the traffic-manager by calling Remain 361 _, err = mClient.Remain(ctx, &manager.RemainRequest{Session: si}) 362 if err == nil { 363 if ctx.Err() != nil { 364 // Call timed out, so the traffic-manager isn't responding at all 365 return nil, ctx.Err() 366 } 367 dlog.Debugf(ctx, "traffic-manager port-forward established, client was already known to the traffic-manager as %q", userAndHost) 368 } else { 369 si = nil 370 } 371 } 372 373 if si == nil { 374 dlog.Debugf(ctx, "traffic-manager port-forward established, making client known to the traffic-manager as %q", userAndHost) 375 si, err = mClient.ArriveAsClient(ctx, &manager.ClientInfo{ 376 Name: userAndHost, 377 Namespace: cluster.Namespace, 378 InstallId: installID, 379 Product: "telepresence", 380 Version: client.Version(), 381 }) 382 if err != nil { 383 return nil, client.CheckTimeout(ctx, fmt.Errorf("manager.ArriveAsClient: %w", err)) 384 } 385 if err = SaveSessionInfoToUserCache(ctx, daemonID, si); err != nil { 386 return nil, err 387 } 388 } 389 390 var opts []grpc.CallOption 391 cfg := client.GetConfig(ctx) 392 if mz := cfg.Grpc().MaxReceiveSize(); mz > 0 { 393 opts = append(opts, grpc.MaxCallRecvMsgSize(int(mz))) 394 } 395 svc.SetManagerClient(mClient, opts...) 396 397 managerName := vi.Name 398 if managerName == "" { 399 // Older traffic-managers doesn't distinguish between OSS and pro versions 400 managerName = "Traffic Manager" 401 } 402 403 extraAlsoProxy, err := parseCIDR(cr.GetAlsoProxy()) 404 if err != nil { 405 return nil, fmt.Errorf("failed to parse extra also proxy: %w", err) 406 } 407 408 extraNeverProxy, err := parseCIDR(cr.GetNeverProxy()) 409 if err != nil { 410 return nil, fmt.Errorf("failed to parse extra never proxy: %w", err) 411 } 412 413 extraAllow, err := parseCIDR(cr.GetAllowConflictingSubnets()) 414 if err != nil { 415 return nil, fmt.Errorf("failed to parse extra allow conflicting subnets: %w", err) 416 } 417 418 cluster.AlsoProxy = append(cluster.AlsoProxy, extraAlsoProxy...) 419 cluster.NeverProxy = append(cluster.NeverProxy, extraNeverProxy...) 420 cluster.AllowConflictingSubnets = append(cluster.AllowConflictingSubnets, extraAllow...) 421 422 sess := &session{ 423 Cluster: cluster, 424 installID: installID, 425 daemonID: daemonID, 426 userAndHost: userAndHost, 427 managerClient: mClient, 428 managerConn: conn, 429 pfDialer: pfDialer, 430 managerName: managerName, 431 managerVersion: managerVersion, 432 sessionInfo: si, 433 interceptWaiters: make(map[string]*awaitIntercept), 434 wlWatcher: newWASWatcher(), 435 isPodDaemon: cr.IsPodDaemon, 436 done: make(chan struct{}), 437 subnetViaWorkloads: cr.SubnetViaWorkloads, 438 } 439 sess.self = sess 440 return sess, nil 441 } 442 443 func (s *session) NewRemainRequest() *manager.RemainRequest { 444 return &manager.RemainRequest{Session: s.SessionInfo()} 445 } 446 447 func (s *session) Remain(ctx context.Context) error { 448 self := s.self 449 ctx, cancel := client.GetConfig(ctx).Timeouts().TimeoutContext(ctx, client.TimeoutTrafficManagerAPI) 450 defer cancel() 451 _, err := self.ManagerClient().Remain(ctx, self.NewRemainRequest()) 452 if err != nil { 453 if status.Code(err) == codes.NotFound { 454 // Session has expired. We need to cancel the owner session and reconnect 455 return ErrSessionExpired 456 } 457 dlog.Errorf(ctx, "error calling Remain: %v", client.CheckTimeout(ctx, err)) 458 } 459 return nil 460 } 461 462 func parseCIDR(cidr []string) ([]*iputil.Subnet, error) { 463 result := make([]*iputil.Subnet, 0) 464 465 if cidr == nil { 466 return result, nil 467 } 468 469 for i := range cidr { 470 _, ipNet, err := net.ParseCIDR(cidr[i]) 471 if err != nil { 472 return nil, fmt.Errorf("failed to parse CIDR %s: %w", cidr[i], err) 473 } 474 result = append(result, (*iputil.Subnet)(ipNet)) 475 } 476 477 return result, nil 478 } 479 480 func CheckTrafficManagerService(ctx context.Context, namespace string) error { 481 dlog.Debug(ctx, "checking that traffic-manager exists") 482 coreV1 := k8sapi.GetK8sInterface(ctx).CoreV1() 483 if _, err := coreV1.Services(namespace).Get(ctx, "traffic-manager", meta.GetOptions{}); err != nil { 484 msg := fmt.Sprintf("unable to get service traffic-manager in %s: %v", namespace, err) 485 se := &k8serrors.StatusError{} 486 if errors.As(err, &se) { 487 if se.Status().Code == http.StatusNotFound { 488 msg = "traffic manager not found, if it is not installed, please run 'telepresence helm install'. " + 489 "If it is installed, try connecting with a --manager-namespace to point telepresence to the namespace it's installed in." 490 } 491 } 492 return errcat.User.New(msg) 493 } 494 return nil 495 } 496 497 func connectError(t rpc.ConnectInfo_ErrType, err error) *rpc.ConnectInfo { 498 st := status.Convert(err) 499 for _, detail := range st.Details() { 500 if detail, ok := detail.(*common.Result); ok { 501 return &rpc.ConnectInfo{ 502 Error: t, 503 ErrorText: string(detail.Data), 504 ErrorCategory: int32(detail.ErrorCategory), 505 } 506 } 507 } 508 return &rpc.ConnectInfo{ 509 Error: t, 510 ErrorText: err.Error(), 511 ErrorCategory: int32(errcat.GetCategory(err)), 512 } 513 } 514 515 // updateDaemonNamespacesLocked will create a new DNS search path from the given namespaces and 516 // send it to the DNS-resolver in the daemon. 517 func (s *session) updateDaemonNamespaces(c context.Context) { 518 const svcDomain = "svc" 519 520 s.wlWatcher.setNamespacesToWatch(c, s.GetCurrentNamespaces(true)) 521 522 domains := s.GetCurrentNamespaces(false) 523 if !slices.Contains(domains, svcDomain) { 524 domains = append(domains, svcDomain) 525 } 526 dlog.Debugf(c, "posting top-level domains %v to root daemon", domains) 527 528 if _, err := s.rootDaemon.SetDNSTopLevelDomains(c, &rootdRpc.Domains{Domains: domains}); err != nil { 529 dlog.Errorf(c, "error posting domains %v to root daemon: %v", domains, err) 530 } 531 dlog.Debug(c, "domains posted successfully") 532 } 533 534 func (s *session) Epilog(ctx context.Context) { 535 _, _ = s.rootDaemon.Disconnect(ctx, &empty.Empty{}) 536 _ = s.pfDialer.Close() 537 dlog.Info(ctx, "-- Session ended") 538 close(s.done) 539 } 540 541 func (s *session) StartServices(g *dgroup.Group) { 542 g.Go("remain", s.remainLoop) 543 g.Go("intercept-port-forward", s.watchInterceptsHandler) 544 g.Go("dial-request-watcher", s.dialRequestWatcher) 545 } 546 547 func runWithRetry(ctx context.Context, f func(context.Context) error) error { 548 backoff := 100 * time.Millisecond 549 for ctx.Err() == nil { 550 if err := f(ctx); err != nil { 551 dlog.Error(ctx, err) 552 dtime.SleepWithContext(ctx, backoff) 553 backoff *= 2 554 if backoff > 3*time.Second { 555 backoff = 3 * time.Second 556 } 557 } 558 } 559 return nil 560 } 561 562 func (s *session) Done() <-chan struct{} { 563 return s.done 564 } 565 566 func (s *session) SessionInfo() *manager.SessionInfo { 567 return s.sessionInfo 568 } 569 570 func (s *session) ApplyConfig(ctx context.Context) error { 571 cfg, err := client.LoadConfig(ctx) 572 if err != nil { 573 return err 574 } 575 err = client.MergeAndReplace(ctx, s.sessionConfig, cfg, false) 576 if err != nil { 577 return err 578 } 579 if len(s.MappedNamespaces) == 0 { 580 mns := client.GetConfig(ctx).Cluster().MappedNamespaces 581 if len(mns) > 0 { 582 s.SetMappedNamespaces(ctx, mns) 583 } 584 } 585 return err 586 } 587 588 // getInfosForWorkloads returns a list of workloads found in the given namespace that fulfils the given filter criteria. 589 func (s *session) getInfosForWorkloads( 590 ctx context.Context, 591 namespaces []string, 592 iMap map[string][]*manager.InterceptInfo, 593 sMap map[string]*rpc.WorkloadInfo_Sidecar, 594 filter rpc.ListRequest_Filter, 595 ) []*rpc.WorkloadInfo { 596 wiMap := make(map[types.UID]*rpc.WorkloadInfo) 597 s.wlWatcher.eachService(ctx, s.GetManagerNamespace(), namespaces, func(svc *core.Service) { 598 wls, err := s.wlWatcher.findMatchingWorkloads(ctx, svc) 599 if err != nil { 600 return 601 } 602 for _, workload := range wls { 603 serviceUID := string(svc.UID) 604 605 if wlInfo, ok := wiMap[workload.GetUID()]; ok { 606 if _, ok := wlInfo.Services[serviceUID]; !ok { 607 wlInfo.Services[serviceUID] = &rpc.WorkloadInfo_ServiceReference{ 608 Name: svc.Name, 609 Namespace: svc.Namespace, 610 Ports: getServicePorts(svc), 611 } 612 } 613 continue 614 } 615 616 name := workload.GetName() 617 dlog.Debugf(ctx, "Getting info for %s %s.%s, matching service %s.%s", workload.GetKind(), name, workload.GetNamespace(), svc.Name, svc.Namespace) 618 619 wlInfo := &rpc.WorkloadInfo{ 620 Name: name, 621 Namespace: workload.GetNamespace(), 622 WorkloadResourceType: workload.GetKind(), 623 Uid: string(workload.GetUID()), 624 Services: map[string]*rpc.WorkloadInfo_ServiceReference{ 625 string(svc.UID): { 626 Name: svc.Name, 627 Namespace: svc.Namespace, 628 Ports: getServicePorts(svc), 629 }, 630 }, 631 } 632 var ok bool 633 if wlInfo.InterceptInfos, ok = iMap[name]; !ok && filter <= rpc.ListRequest_INTERCEPTS { 634 continue 635 } 636 if wlInfo.Sidecar, ok = sMap[name]; !ok && filter <= rpc.ListRequest_INSTALLED_AGENTS { 637 continue 638 } 639 wiMap[workload.GetUID()] = wlInfo 640 } 641 }) 642 wiz := make([]*rpc.WorkloadInfo, len(wiMap)) 643 i := 0 644 for _, wi := range wiMap { 645 wiz[i] = wi 646 i++ 647 } 648 sort.Slice(wiz, func(i, j int) bool { return wiz[i].Name < wiz[j].Name }) 649 return wiz 650 } 651 652 func getServicePorts(svc *core.Service) []*rpc.WorkloadInfo_ServiceReference_Port { 653 ports := make([]*rpc.WorkloadInfo_ServiceReference_Port, len(svc.Spec.Ports)) 654 for i, p := range svc.Spec.Ports { 655 ports[i] = &rpc.WorkloadInfo_ServiceReference_Port{ 656 Name: p.Name, 657 Port: p.Port, 658 } 659 } 660 return ports 661 } 662 663 func (s *session) waitForSync(ctx context.Context) { 664 s.wlWatcher.setNamespacesToWatch(ctx, s.GetCurrentNamespaces(true)) 665 s.wlWatcher.waitForSync(ctx) 666 } 667 668 func (s *session) WatchWorkloads(c context.Context, wr *rpc.WatchWorkloadsRequest, stream userd.WatchWorkloadsStream) error { 669 s.waitForSync(c) 670 s.ensureWatchers(c, wr.Namespaces) 671 sCtx, sCancel := context.WithCancel(c) 672 // We need to make sure the subscription ends when we leave this method, since this is the one consuming the snapshotAvailable channel. 673 // Otherwise, the goroutine that writes to the channel will leak. 674 defer sCancel() 675 snapshotAvailable := s.wlWatcher.subscribe(sCtx) 676 for { 677 select { 678 case <-c.Done(): // if context is done (usually the session's context). 679 return nil 680 case <-stream.Context().Done(): // if stream context is done. 681 return nil 682 case <-snapshotAvailable: 683 snapshot, err := s.workloadInfoSnapshot(c, wr.GetNamespaces(), rpc.ListRequest_INTERCEPTABLE) 684 if err != nil { 685 return status.Errorf(codes.Unavailable, "failed to create WorkloadInfoSnapshot: %v", err) 686 } 687 if err := stream.Send(snapshot); err != nil { 688 dlog.Errorf(c, "WatchWorkloads.Send() failed: %v", err) 689 return err 690 } 691 } 692 } 693 } 694 695 func (s *session) WorkloadInfoSnapshot( 696 ctx context.Context, 697 namespaces []string, 698 filter rpc.ListRequest_Filter, 699 ) (*rpc.WorkloadInfoSnapshot, error) { 700 s.waitForSync(ctx) 701 return s.workloadInfoSnapshot(ctx, namespaces, filter) 702 } 703 704 func (s *session) ensureWatchers(ctx context.Context, 705 namespaces []string, 706 ) { 707 dlog.Debugf(ctx, "Ensure watchers %v", namespaces) 708 wg := sync.WaitGroup{} 709 wg.Add(len(namespaces)) 710 for _, ns := range namespaces { 711 if ns == "" { 712 ns = s.Namespace 713 } 714 wgp := &wg 715 s.wlWatcher.ensureStarted(ctx, ns, func(started bool) { 716 if started { 717 dlog.Debugf(ctx, "watchers for %s started", ns) 718 } 719 if wgp != nil { 720 wgp.Done() 721 wgp = nil 722 } 723 }) 724 } 725 wg.Wait() 726 } 727 728 func (s *session) workloadInfoSnapshot( 729 ctx context.Context, 730 namespaces []string, 731 filter rpc.ListRequest_Filter, 732 ) (*rpc.WorkloadInfoSnapshot, error) { 733 is := s.getCurrentIntercepts() 734 s.ensureWatchers(ctx, namespaces) 735 736 var nss []string 737 if filter == rpc.ListRequest_INTERCEPTS { 738 // Special case, we don't care about namespaces in general. Instead, we use the connected namespace 739 nss = []string{s.Namespace} 740 } else { 741 nss = make([]string, 0, len(namespaces)) 742 for _, ns := range namespaces { 743 ns = s.ActualNamespace(ns) 744 if ns != "" { 745 nss = append(nss, ns) 746 } 747 } 748 } 749 if len(nss) == 0 { 750 // none of the namespaces are currently mapped 751 return &rpc.WorkloadInfoSnapshot{}, nil 752 } 753 754 iMap := make(map[string][]*manager.InterceptInfo, len(is)) 755 nextIs: 756 for _, i := range is { 757 for _, ns := range nss { 758 if i.Spec.Namespace == ns { 759 iMap[i.Spec.Agent] = append(iMap[i.Spec.Agent], i.InterceptInfo) 760 continue nextIs 761 } 762 } 763 } 764 765 sMap := make(map[string]*rpc.WorkloadInfo_Sidecar) 766 for _, ns := range nss { 767 for k, v := range s.getCurrentSidecarsInNamespace(ctx, ns) { 768 data, err := json.Marshal(v) 769 if err != nil { 770 continue 771 } 772 sMap[k] = &rpc.WorkloadInfo_Sidecar{Json: data} 773 } 774 } 775 776 workloadInfos := s.getInfosForWorkloads(ctx, nss, iMap, sMap, filter) 777 return &rpc.WorkloadInfoSnapshot{Workloads: workloadInfos}, nil 778 } 779 780 var ErrSessionExpired = errors.New("session expired") 781 782 func (s *session) remainLoop(c context.Context) error { 783 ticker := time.NewTicker(5 * time.Second) 784 defer func() { 785 ticker.Stop() 786 c = dcontext.WithoutCancel(c) 787 c, cancel := context.WithTimeout(c, 3*time.Second) 788 defer cancel() 789 if _, err := s.managerClient.Depart(c, s.SessionInfo()); err != nil { 790 dlog.Errorf(c, "failed to depart from manager: %v", err) 791 } else { 792 // Depart succeeded so the traffic-manager has dropped the session. We should too 793 if err = DeleteSessionInfoFromUserCache(c, s.daemonID); err != nil { 794 dlog.Errorf(c, "failed to delete session from user cache: %v", err) 795 } 796 } 797 s.managerConn.Close() 798 }() 799 800 for { 801 select { 802 case <-c.Done(): 803 return nil 804 case <-ticker.C: 805 if err := s.Remain(c); err != nil { 806 return err 807 } 808 } 809 } 810 } 811 812 func (s *session) UpdateStatus(c context.Context, cr *rpc.ConnectRequest) *rpc.ConnectInfo { 813 config, err := client.DaemonKubeconfig(c, cr) 814 if err != nil { 815 return connectError(rpc.ConnectInfo_CLUSTER_FAILED, err) 816 } 817 818 if !cr.IsPodDaemon { 819 envEQ := true 820 for k, v := range cr.Environment { 821 if k[0] == '-' { 822 if _, ok := os.LookupEnv(k[:1]); ok { 823 envEQ = false 824 break 825 } 826 } else { 827 if ov, ok := os.LookupEnv(k); !ok || ov != v { 828 envEQ = false 829 break 830 } 831 } 832 } 833 if !(envEQ && s.Kubeconfig.ContextServiceAndFlagsEqual(config)) { 834 return &rpc.ConnectInfo{ 835 Error: rpc.ConnectInfo_MUST_RESTART, 836 ClusterContext: s.Kubeconfig.Context, 837 ClusterServer: s.Kubeconfig.Server, 838 ClusterId: s.GetClusterId(c), 839 ManagerInstallId: s.GetManagerInstallId(c), 840 } 841 } 842 } 843 844 namespaces := cr.MappedNamespaces 845 if len(namespaces) == 1 && namespaces[0] == "all" { 846 namespaces = nil 847 } 848 if len(namespaces) == 0 { 849 namespaces = client.GetConfig(c).Cluster().MappedNamespaces 850 } 851 852 if s.SetMappedNamespaces(c, namespaces) { 853 if len(namespaces) == 0 && k8sclient.CanWatchNamespaces(c) { 854 s.StartNamespaceWatcher(c) 855 } 856 s.currentInterceptsLock.Lock() 857 s.ingressInfo = nil 858 s.currentInterceptsLock.Unlock() 859 } 860 s.subnetViaWorkloads = cr.SubnetViaWorkloads 861 return s.Status(c) 862 } 863 864 func (s *session) Status(c context.Context) *rpc.ConnectInfo { 865 return s.status(c, false) 866 } 867 868 func (s *session) status(c context.Context, initial bool) *rpc.ConnectInfo { 869 cfg := s.Kubeconfig 870 ret := &rpc.ConnectInfo{ 871 ClusterContext: cfg.Context, 872 ClusterServer: cfg.Server, 873 ClusterId: s.GetClusterId(c), 874 ManagerInstallId: s.GetManagerInstallId(c), 875 SessionInfo: s.SessionInfo(), 876 ConnectionName: s.daemonID.Name, 877 KubeFlags: s.OriginalFlagMap, 878 Namespace: s.Namespace, 879 Intercepts: &manager.InterceptInfoSnapshot{Intercepts: s.getCurrentInterceptInfos()}, 880 ManagerNamespace: cfg.GetManagerNamespace(), 881 SubnetViaWorkloads: s.subnetViaWorkloads, 882 Version: &common.VersionInfo{ 883 ApiVersion: client.APIVersion, 884 Version: client.Version(), 885 Executable: client.GetExe(c), 886 Name: client.DisplayName, 887 }, 888 } 889 if !initial { 890 ret.Error = rpc.ConnectInfo_ALREADY_CONNECTED 891 } 892 if len(s.MappedNamespaces) > 0 || len(s.sessionConfig.Cluster().MappedNamespaces) > 0 { 893 ret.MappedNamespaces = s.GetCurrentNamespaces(true) 894 } 895 var err error 896 ret.DaemonStatus, err = s.rootDaemon.Status(c, &empty.Empty{}) 897 if err != nil { 898 return connectError(rpc.ConnectInfo_DAEMON_FAILED, err) 899 } 900 return ret 901 } 902 903 // Uninstall one or all traffic-agents from the cluster if the client has sufficient credentials to do so. 904 // 905 // Uninstalling all or specific agents require that the client can get and update the agents ConfigMap. 906 func (s *session) Uninstall(ctx context.Context, ur *rpc.UninstallRequest) (*common.Result, error) { 907 api := k8sapi.GetK8sInterface(ctx).CoreV1() 908 loadAgentConfigMap := func(ns string) (*core.ConfigMap, error) { 909 cm, err := api.ConfigMaps(ns).Get(ctx, agentconfig.ConfigMap, meta.GetOptions{}) 910 if err != nil { 911 if k8serrors.IsNotFound(err) { 912 // there are no agents to remove 913 return nil, nil 914 } 915 // TODO: find out if this is due to lack of access credentials and if so, report using errcat.User with more meaningful message 916 return nil, err 917 } 918 return cm, nil 919 } 920 921 updateAgentConfigMap := func(ns string, cm *core.ConfigMap) error { 922 _, err := api.ConfigMaps(ns).Update(ctx, cm, meta.UpdateOptions{}) 923 return err 924 } 925 926 // Removal of agents requested. We need the agents ConfigMap in order to do that. 927 // This removal is deliberately done in the client instead of the traffic-manager so that RBAC can be configured 928 // to prevent the clients from doing it. 929 if ur.UninstallType == rpc.UninstallRequest_NAMED_AGENTS { 930 // must have a valid namespace in order to uninstall named agents 931 s.waitForSync(ctx) 932 if ur.Namespace == "" { 933 ur.Namespace = s.Namespace 934 } 935 s.wlWatcher.ensureStarted(ctx, ur.Namespace, nil) 936 namespace := s.ActualNamespace(ur.Namespace) 937 if namespace == "" { 938 // namespace is not mapped 939 return errcat.ToResult(errcat.User.Newf("namespace %s is not mapped", ur.Namespace)), nil 940 } 941 cm, err := loadAgentConfigMap(namespace) 942 if err != nil || cm == nil { 943 return errcat.ToResult(err), nil 944 } 945 changed := false 946 ics := s.getCurrentIntercepts() 947 for _, an := range ur.Agents { 948 for _, ic := range ics { 949 if ic.Spec.Namespace == namespace && ic.Spec.Agent == an { 950 _ = s.removeIntercept(ctx, ic) 951 break 952 } 953 } 954 if _, ok := cm.Data[an]; ok { 955 delete(cm.Data, an) 956 changed = true 957 } 958 } 959 if changed { 960 return errcat.ToResult(updateAgentConfigMap(namespace, cm)), nil 961 } 962 return errcat.ToResult(nil), nil 963 } 964 if ur.UninstallType != rpc.UninstallRequest_ALL_AGENTS { 965 return nil, status.Error(codes.InvalidArgument, "invalid uninstall request") 966 } 967 968 _ = s.ClearIntercepts(ctx) 969 clearAgentsConfigMap := func(ns string) error { 970 cm, err := loadAgentConfigMap(ns) 971 if err != nil { 972 return err 973 } 974 if cm == nil { 975 return nil 976 } 977 if len(cm.Data) > 0 { 978 cm.Data = nil 979 return updateAgentConfigMap(ns, cm) 980 } 981 return nil 982 } 983 984 if ur.Namespace != "" { 985 s.waitForSync(ctx) 986 if ur.Namespace == "" { 987 ur.Namespace = s.Namespace 988 } 989 s.wlWatcher.ensureStarted(ctx, ur.Namespace, nil) 990 namespace := s.ActualNamespace(ur.Namespace) 991 if namespace == "" { 992 // namespace is not mapped 993 return errcat.ToResult(errcat.User.Newf("namespace %s is not mapped", ur.Namespace)), nil 994 } 995 return errcat.ToResult(clearAgentsConfigMap(namespace)), nil 996 } else { 997 // Load all effected configmaps 998 for _, ns := range s.GetCurrentNamespaces(true) { 999 err := clearAgentsConfigMap(ns) 1000 if err != nil { 1001 return errcat.ToResult(err), nil 1002 } 1003 } 1004 } 1005 return errcat.ToResult(nil), nil 1006 } 1007 1008 func (s *session) getOutboundInfo(ctx context.Context, cr *rpc.ConnectRequest) *rootdRpc.OutboundInfo { 1009 // We'll figure out the IP address of the API server(s) so that we can tell the daemon never to proxy them. 1010 // This is because in some setups the API server will be in the same CIDR range as the pods, and the 1011 // daemon will attempt to proxy traffic to it. This usually results in a loss of all traffic to/from 1012 // the cluster, since an open tunnel to the traffic-manager (via the API server) is itself required 1013 // to communicate with the cluster. 1014 neverProxy := make([]*manager.IPNet, 0, 1+len(s.NeverProxy)) 1015 serverURL, err := url.Parse(s.Server) 1016 if err != nil { 1017 // This really shouldn't happen as we are connected to the server 1018 dlog.Errorf(ctx, "Unable to parse url for k8s server %s: %v", s.Server, err) 1019 } else { 1020 hostname := serverURL.Hostname() 1021 rawIP := iputil.Parse(hostname) 1022 ips := []net.IP{rawIP} 1023 if rawIP == nil { 1024 var err error 1025 ips, err = net.LookupIP(hostname) 1026 if err != nil { 1027 dlog.Errorf(ctx, "Unable to do DNS lookup for k8s server %s: %v", hostname, err) 1028 ips = []net.IP{} 1029 } 1030 } 1031 for _, ip := range ips { 1032 mask := net.CIDRMask(128, 128) 1033 if ipv4 := ip.To4(); ipv4 != nil { 1034 mask = net.CIDRMask(32, 32) 1035 ip = ipv4 1036 } 1037 if !ip.IsLoopback() { 1038 ipnet := &net.IPNet{IP: ip, Mask: mask} 1039 neverProxy = append(neverProxy, iputil.IPNetToRPC(ipnet)) 1040 } 1041 } 1042 } 1043 for _, np := range s.NeverProxy { 1044 neverProxy = append(neverProxy, iputil.IPNetToRPC((*net.IPNet)(np))) 1045 } 1046 info := &rootdRpc.OutboundInfo{ 1047 Session: s.sessionInfo, 1048 NeverProxySubnets: neverProxy, 1049 HomeDir: homedir.HomeDir(), 1050 Namespace: s.Namespace, 1051 ManagerNamespace: s.GetManagerNamespace(), 1052 SubnetViaWorkloads: s.subnetViaWorkloads, 1053 KubeFlags: cr.KubeFlags, 1054 KubeconfigData: cr.KubeconfigData, 1055 } 1056 1057 if s.DNS != nil { 1058 info.Dns = &rootdRpc.DNSConfig{ 1059 ExcludeSuffixes: s.DNS.ExcludeSuffixes, 1060 IncludeSuffixes: s.DNS.IncludeSuffixes, 1061 Excludes: s.DNS.Excludes, 1062 Mappings: s.DNS.Mappings.ToRPC(), 1063 LookupTimeout: durationpb.New(s.DNS.LookupTimeout.Duration), 1064 } 1065 if len(s.DNS.LocalIP) > 0 { 1066 info.Dns.LocalIp = s.DNS.LocalIP.IP() 1067 } 1068 if len(s.DNS.RemoteIP) > 0 { 1069 info.Dns.RemoteIp = s.DNS.RemoteIP.IP() 1070 } 1071 } 1072 1073 if len(s.AlsoProxy) > 0 { 1074 info.AlsoProxySubnets = make([]*manager.IPNet, len(s.AlsoProxy)) 1075 for i, ap := range s.AlsoProxy { 1076 info.AlsoProxySubnets[i] = iputil.IPNetToRPC((*net.IPNet)(ap)) 1077 } 1078 } 1079 if len(s.AllowConflictingSubnets) > 0 { 1080 info.AllowConflictingSubnets = make([]*manager.IPNet, len(s.AllowConflictingSubnets)) 1081 for i, ap := range s.AllowConflictingSubnets { 1082 info.AllowConflictingSubnets[i] = iputil.IPNetToRPC((*net.IPNet)(ap)) 1083 } 1084 } 1085 return info 1086 } 1087 1088 func (s *session) connectRootDaemon(ctx context.Context, oi *rootdRpc.OutboundInfo, isPodDaemon bool) (rd rootdRpc.DaemonClient, err error) { 1089 // establish a connection to the root daemon gRPC grpcService 1090 dlog.Info(ctx, "Connecting to root daemon...") 1091 svc := userd.GetService(ctx) 1092 if svc.RootSessionInProcess() { 1093 // Just run the root session in-process. 1094 rootSession, err := rootd.NewInProcSession(ctx, oi, s.managerClient, s.managerVersion, isPodDaemon) 1095 if err != nil { 1096 return nil, err 1097 } 1098 if err = rootSession.Start(ctx, dgroup.NewGroup(ctx, dgroup.GroupConfig{})); err != nil { 1099 return nil, err 1100 } 1101 rd = rootSession 1102 } else { 1103 var conn *grpc.ClientConn 1104 conn, err = socket.Dial(ctx, socket.RootDaemonPath(ctx), 1105 grpc.WithStatsHandler(otelgrpc.NewClientHandler()), 1106 ) 1107 if err != nil { 1108 return nil, fmt.Errorf("unable open root daemon socket: %w", err) 1109 } 1110 defer func() { 1111 if err != nil { 1112 conn.Close() 1113 } 1114 }() 1115 rd = rootdRpc.NewDaemonClient(conn) 1116 1117 for attempt := 1; ; attempt++ { 1118 var rootStatus *rootdRpc.DaemonStatus 1119 if rootStatus, err = rd.Connect(ctx, oi); err != nil { 1120 return nil, fmt.Errorf("failed to connect to root daemon: %w", err) 1121 } 1122 oc := rootStatus.OutboundConfig 1123 if oc == nil || oc.Session == nil { 1124 // This is an internal error. Something is wrong with the root daemon. 1125 return nil, errors.New("root daemon's OutboundConfig has no Session") 1126 } 1127 if oc.Session.SessionId == oi.Session.SessionId { 1128 break 1129 } 1130 1131 // Root daemon was running an old session. This indicates that this daemon somehow 1132 // crashed without disconnecting. So let's do that now, and then reconnect... 1133 if attempt == 2 { 1134 // ...or not, since we've already done it. 1135 return nil, errors.New("unable to reconnect to root daemon") 1136 } 1137 if _, err = rd.Disconnect(ctx, &empty.Empty{}); err != nil { 1138 return nil, fmt.Errorf("failed to disconnect from the root daemon: %w", err) 1139 } 1140 } 1141 } 1142 1143 // The root daemon needs time to set up the TUN-device and DNS, which involves interacting 1144 // with the cluster-side traffic-manager. We know that the traffic-manager is up and 1145 // responding at this point, so it shouldn't take too long. 1146 ctx, cancel := client.GetConfig(ctx).Timeouts().TimeoutContext(ctx, client.TimeoutTrafficManagerAPI) 1147 defer cancel() 1148 if _, err = rd.WaitForNetwork(ctx, &empty.Empty{}); err != nil { 1149 if se, ok := status.FromError(err); ok { 1150 err = se.Err() 1151 } 1152 return nil, fmt.Errorf("failed to connect to root daemon: %v", err) 1153 } 1154 dlog.Debug(ctx, "Connected to root daemon") 1155 return rd, nil 1156 }