github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/pkg/client/userd/trafficmgr/intercept.go (about) 1 package trafficmgr 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "net/http" 9 "os" 10 "strconv" 11 "strings" 12 "sync" 13 "time" 14 15 grpcCodes "google.golang.org/grpc/codes" 16 grpcStatus "google.golang.org/grpc/status" 17 "google.golang.org/protobuf/types/known/durationpb" 18 19 "github.com/datawire/dlib/dgroup" 20 "github.com/datawire/dlib/dlog" 21 "github.com/telepresenceio/telepresence/rpc/v2/common" 22 rpc "github.com/telepresenceio/telepresence/rpc/v2/connector" 23 "github.com/telepresenceio/telepresence/rpc/v2/daemon" 24 "github.com/telepresenceio/telepresence/rpc/v2/manager" 25 "github.com/telepresenceio/telepresence/v2/pkg/agentconfig" 26 "github.com/telepresenceio/telepresence/v2/pkg/client" 27 "github.com/telepresenceio/telepresence/v2/pkg/client/docker" 28 "github.com/telepresenceio/telepresence/v2/pkg/client/remotefs" 29 "github.com/telepresenceio/telepresence/v2/pkg/client/userd" 30 "github.com/telepresenceio/telepresence/v2/pkg/errcat" 31 "github.com/telepresenceio/telepresence/v2/pkg/forwarder" 32 "github.com/telepresenceio/telepresence/v2/pkg/iputil" 33 "github.com/telepresenceio/telepresence/v2/pkg/maps" 34 "github.com/telepresenceio/telepresence/v2/pkg/matcher" 35 "github.com/telepresenceio/telepresence/v2/pkg/proc" 36 "github.com/telepresenceio/telepresence/v2/pkg/restapi" 37 ) 38 39 // intercept tracks the life-cycle of an intercept, dictated by the intercepts 40 // arrival and departure in the watchInterceptsLoop. 41 type intercept struct { 42 sync.Mutex 43 *manager.InterceptInfo 44 45 // ctx is a context cancelled by the cancel attribute. It must be used by 46 // services that should be cancelled when the intercept ends 47 ctx context.Context 48 49 // cancel is called when the intercept is no longer present 50 cancel context.CancelFunc 51 52 // wg is the group to wait for after a call to cancel 53 wg sync.WaitGroup 54 55 // pid of intercept handler for an intercept. This entry will only be present when 56 // the telepresence intercept command spawns a new command. The int value reflects 57 // the pid of that new command. 58 pid int 59 60 // containerName is the name or ID of the container that the intercept handler is 61 // running in, when it runs in Docker. As with pid, this entry will only be present when 62 // the telepresence intercept command spawns a new command using --docker-run or 63 // --docker-build 64 containerName string 65 66 // The mounter of the remote file system. 67 remotefs.Mounter 68 69 // Use bridged ftp/sftp mount through this local port 70 localMountPort int32 71 } 72 73 // interceptResult is what gets written to the awaitIntercept's waitCh channel when the 74 // awaited intercept arrives. 75 type interceptResult struct { 76 intercept *intercept 77 mountsDone <-chan struct{} 78 err error 79 } 80 81 // awaitIntercept is what the traffic-manager is using to notify the watchInterceptsLoop 82 // about an expected intercept arrival. 83 type awaitIntercept struct { 84 // mountPoint is the mount point assigned to the InterceptInfo's ClientMountPoint when 85 // it arrives from the traffic-manager. 86 mountPoint string 87 88 // mountPort is optional and indicates that a TCP bridge should be established, allowing 89 // the mount to take place in a host 90 mountPort int32 91 92 waitCh chan<- interceptResult 93 } 94 95 // podInterceptKey identifies an intercepted pod. Although an intercept may span multiple 96 // pods, the user daemon will always choose exactly one pod with an active intercept to 97 // do port forwards and remote mounts. 98 type podInterceptKey struct { 99 Id string 100 PodIP string 101 } 102 103 // The podIntercept provides pod specific synchronization for cancellation of port forwards 104 // and mounts. Cancellation here does not mean that the intercept is cancelled. It just 105 // means that the given pod is no longer the chosen one. This typically happens when pods 106 // are scaled down and then up again. 107 type podIntercept struct { 108 wg sync.WaitGroup 109 cancelPod context.CancelFunc 110 } 111 112 // podIntercepts is what the traffic-manager is using to keep track of the chosen pods for 113 // the currently active intercepts. 114 type podIntercepts struct { 115 sync.Mutex 116 117 // alive contains a map of the currently alive pod intercepts 118 alivePods map[podInterceptKey]*podIntercept 119 120 // snapshot is recreated for each new intercept snapshot read from the manager. 121 // The set controls which podIntercepts that are considered alive when cancelUnwanted 122 // is called 123 snapshot map[podInterceptKey]struct{} 124 125 // mountsDone contains channels that are closed when the mounts are prepared for the 126 // given id and podIP 127 mountsDone map[podInterceptKey]chan struct{} 128 } 129 130 func (ic *intercept) localPorts() []string { 131 // Older versions use ii.extraPorts (TCP only), newer versions use ii.localPorts. 132 ps := ic.Spec.LocalPorts 133 if len(ps) == 0 { 134 for _, ep := range ic.Spec.ExtraPorts { 135 ps = append(ps, strconv.Itoa(int(ep))) 136 } 137 ic.Spec.LocalPorts = ps 138 } 139 return ps 140 } 141 142 func (ic *intercept) shouldForward() bool { 143 return len(ic.localPorts()) > 0 144 } 145 146 // startForwards starts port forwards and mounts for the given podInterceptKey. 147 // It assumes that the user has called shouldForward and is sure that something will be started. 148 func (ic *intercept) startForwards(ctx context.Context, wg *sync.WaitGroup) { 149 for _, port := range ic.localPorts() { 150 var pfCtx context.Context 151 if iputil.IsIpV6Addr(ic.PodIp) { 152 pfCtx = dgroup.WithGoroutineName(ctx, fmt.Sprintf("/[%s]:%s", ic.PodIp, port)) 153 } else { 154 pfCtx = dgroup.WithGoroutineName(ctx, fmt.Sprintf("/%s:%s", ic.PodIp, port)) 155 } 156 wg.Add(1) 157 go ic.workerPortForward(pfCtx, port, wg) 158 } 159 } 160 161 func (ic *intercept) workerPortForward(ctx context.Context, port string, wg *sync.WaitGroup) { 162 defer wg.Done() 163 pp, err := agentconfig.NewPortAndProto(port) 164 if err != nil { 165 dlog.Errorf(ctx, "malformed extra port %q: %v", port, err) 166 return 167 } 168 addr, err := pp.Addr() 169 if err != nil { 170 dlog.Errorf(ctx, "unable to resolve extra port %q: %v", port, err) 171 return 172 } 173 f := forwarder.NewInterceptor(addr, ic.PodIp, pp.Port) 174 err = f.Serve(ctx, nil) 175 if err != nil && ctx.Err() == nil { 176 dlog.Errorf(ctx, "port-forwarder failed with %v", err) 177 } 178 } 179 180 func newPodIntercepts() *podIntercepts { 181 return &podIntercepts{alivePods: make(map[podInterceptKey]*podIntercept)} 182 } 183 184 // start a port forward for the given intercept and remembers that it's alive. 185 func (lpf *podIntercepts) start(ctx context.Context, ic *intercept, rd daemon.DaemonClient) { 186 // The mounts performed here are synced on by podIP + port to keep track of active 187 // mounts. This is not enough in situations when a pod is deleted and another pod 188 // takes over. That is two different IPs so an additional synchronization on the actual 189 // mount point is necessary to prevent that it is established and deleted at the same 190 // time. 191 fk := podInterceptKey{ 192 Id: ic.Id, 193 PodIP: ic.PodIp, 194 } 195 196 defer func() { 197 if md, ok := lpf.mountsDone[fk]; ok { 198 delete(lpf.mountsDone, fk) 199 close(md) 200 } 201 }() 202 203 if !ic.shouldForward() && !ic.shouldMount() { 204 dlog.Debugf(ctx, "No mounts or port-forwards needed for %+v", fk) 205 return 206 } 207 208 // Make part of current snapshot tracking so that it isn't removed once the 209 // snapshot has been completely handled 210 lpf.snapshot[fk] = struct{}{} 211 212 // Already started? 213 if _, isLive := lpf.alivePods[fk]; isLive { 214 dlog.Debugf(ctx, "Mounts and port-forwards already active for %+v", fk) 215 return 216 } 217 218 if client.GetConfig(ctx).Cluster().AgentPortForward { 219 // An agent port-forward to the pod with a designated to the PodIP is necessary in order to 220 // mount or port-forward to localhost. 221 _, err := rd.WaitForAgentIP(ctx, &daemon.WaitForAgentIPRequest{ 222 Ip: iputil.Parse(ic.PodIp), 223 Timeout: durationpb.New(10 * time.Second), 224 }) 225 switch grpcStatus.Code(err) { 226 // Unavailable means that the feature disabled. This is OK, the traffic-manager will do the forwarding 227 case grpcCodes.OK, grpcCodes.Unavailable: 228 case grpcCodes.DeadlineExceeded: 229 dlog.Errorf(ctx, "timeout waiting for port-forward to traffic-agent with pod-ip %s", ic.PodIp) 230 return 231 default: 232 dlog.Errorf(ctx, "unexpected error for port-forward to traffic-agent with pod-ip %s: %v", ic.PodIp, err) 233 return 234 } 235 } 236 237 ctx, cancel := context.WithCancel(ic.ctx) 238 lp := &podIntercept{cancelPod: cancel} 239 if ic.shouldMount() { 240 ic.startMount(ctx, &ic.wg, &lp.wg) 241 } 242 if ic.shouldForward() { 243 ic.startForwards(ctx, &lp.wg) 244 } 245 dlog.Debugf(ctx, "Started mounts and port-forwards for %+v", fk) 246 lpf.alivePods[fk] = lp 247 } 248 249 // initSnapshot prepares this instance for a new round of start calls followed by a cancelUnwanted. 250 func (lpf *podIntercepts) initSnapshot() { 251 lpf.snapshot = make(map[podInterceptKey]struct{}) 252 lpf.mountsDone = make(map[podInterceptKey]chan struct{}) 253 } 254 255 func (lpf *podIntercepts) getOrCreateMountsDone(ic *intercept) <-chan struct{} { 256 fk := podInterceptKey{Id: ic.Id, PodIP: ic.PodIp} 257 md, ok := lpf.mountsDone[fk] 258 if !ok { 259 md = make(chan struct{}) 260 lpf.mountsDone[fk] = md 261 } 262 return md 263 } 264 265 // cancelUnwanted cancels all port forwards that hasn't been started since initSnapshot. 266 func (lpf *podIntercepts) cancelUnwanted(ctx context.Context) { 267 for fk, lp := range lpf.alivePods { 268 if _, isWanted := lpf.snapshot[fk]; !isWanted { 269 dlog.Infof(ctx, "Terminating mounts and port-forwards for %+v", fk) 270 lp.cancelPod() 271 delete(lpf.alivePods, fk) 272 md, ok := lpf.mountsDone[fk] 273 if ok { 274 delete(lpf.mountsDone, fk) 275 close(md) 276 } 277 lp.wg.Wait() 278 } 279 } 280 } 281 282 func (s *session) watchInterceptsHandler(ctx context.Context) error { 283 // Don't use a dgroup.Group because: 284 // 1. we don't actually care about tracking errors (we just always retry) or any of 285 // dgroup's other functionality 286 // 2. because goroutines may churn as intercepts are created and deleted, tracking all of 287 // their exit statuses is just a memory leak 288 // 3. because we want a per-worker cancel, we'd have to implement our own Context 289 // management on top anyway, so dgroup wouldn't actually save us any complexity. 290 return runWithRetry(ctx, s.watchInterceptsLoop) 291 } 292 293 func (s *session) watchInterceptsLoop(ctx context.Context) error { 294 stream, err := s.managerClient.WatchIntercepts(ctx, s.SessionInfo()) 295 if err != nil { 296 return fmt.Errorf("manager.WatchIntercepts dial: %w", err) 297 } 298 podIcepts := newPodIntercepts() 299 for ctx.Err() == nil { 300 snapshot, err := stream.Recv() 301 if err != nil { 302 // Handle as if we had an empty snapshot. This will ensure that port forwards and volume mounts are cancelled correctly. 303 s.handleInterceptSnapshot(ctx, podIcepts, nil) 304 if ctx.Err() != nil || errors.Is(err, io.EOF) { 305 // Normal termination 306 return nil 307 } 308 return fmt.Errorf("manager.WatchIntercepts recv: %w", err) 309 } 310 s.handleInterceptSnapshot(ctx, podIcepts, snapshot.Intercepts) 311 } 312 return nil 313 } 314 315 func (s *session) handleInterceptSnapshot(ctx context.Context, podIcepts *podIntercepts, intercepts []*manager.InterceptInfo) { 316 s.setCurrentIntercepts(ctx, intercepts) 317 podIcepts.initSnapshot() 318 319 for _, ii := range intercepts { 320 if ii.Disposition == manager.InterceptDispositionType_WAITING { 321 continue 322 } 323 324 s.currentInterceptsLock.Lock() 325 ic := s.currentIntercepts[ii.Id] 326 aw := s.interceptWaiters[ii.Spec.Name] 327 if aw != nil { 328 delete(s.interceptWaiters, ii.Spec.Name) 329 } 330 s.currentInterceptsLock.Unlock() 331 332 var err error 333 if ii.Disposition == manager.InterceptDispositionType_ACTIVE { 334 ns := ii.Spec.Namespace 335 if s.Namespace != ns { 336 err = errcat.User.Newf("active intercepts in both namespace %s and %s", ns, s.Namespace) 337 } 338 } else { 339 err = fmt.Errorf("intercept in error state %v: %v", ii.Disposition, ii.Message) 340 } 341 342 // Notify waiters for active intercepts 343 if aw != nil { 344 dlog.Debugf(ctx, "wait status: intercept id=%q is no longer WAITING; is now %v", ii.Id, ii.Disposition) 345 ir := interceptResult{ 346 intercept: ic, 347 err: err, 348 mountsDone: podIcepts.getOrCreateMountsDone(ic), 349 } 350 select { 351 case aw.waitCh <- ir: 352 if err != nil { 353 // Error logged by receiver 354 continue 355 } 356 default: 357 // Channel was closed 358 dlog.Debugf(ctx, "unable to propagate intercept id=%q", ii.Id) 359 } 360 } 361 if err != nil { 362 dlog.Error(ctx, err) 363 continue 364 } 365 366 if s.isPodDaemon { 367 // disable mount point logic 368 ic.FtpPort = 0 369 ic.SftpPort = 0 370 } 371 podIcepts.start(ctx, ic, s.rootDaemon) 372 } 373 podIcepts.cancelUnwanted(ctx) 374 } 375 376 // getCurrentIntercepts returns a copy of the current intercept snapshot. This snapshot does 377 // not include any local-only intercepts. 378 func (s *session) getCurrentIntercepts() []*intercept { 379 // Copy the current snapshot 380 s.currentInterceptsLock.Lock() 381 intercepts := maps.ToSortedSlice(s.currentIntercepts) 382 s.currentInterceptsLock.Unlock() 383 return intercepts 384 } 385 386 // getCurrentInterceptInfos returns the InterceptInfos of the current intercept snapshot. 387 func (s *session) getCurrentInterceptInfos() []*manager.InterceptInfo { 388 // Copy the current snapshot 389 ics := s.getCurrentIntercepts() 390 ifs := make([]*manager.InterceptInfo, len(ics)) 391 for idx, ic := range ics { 392 ifs[idx] = ic.InterceptInfo 393 } 394 return ifs 395 } 396 397 func (s *session) setCurrentIntercepts(ctx context.Context, iis []*manager.InterceptInfo) { 398 s.currentInterceptsLock.Lock() 399 defer s.currentInterceptsLock.Unlock() 400 intercepts := make(map[string]*intercept, len(iis)) 401 sb := strings.Builder{} 402 sb.WriteByte('[') 403 for i, ii := range iis { 404 ic, ok := s.currentIntercepts[ii.Id] 405 if ok { 406 // retain ClientMountPoint, it's assigned in the client and never passed from the traffic-manager 407 ii.ClientMountPoint = ic.ClientMountPoint 408 ic.InterceptInfo = ii 409 } else { 410 ic = &intercept{InterceptInfo: ii} 411 ic.ctx, ic.cancel = context.WithCancel(ctx) 412 dlog.Debugf(ctx, "Received new intercept %s", ic.Spec.Name) 413 if aw, ok := s.interceptWaiters[ii.Spec.Name]; ok { 414 ic.ClientMountPoint = aw.mountPoint 415 ic.localMountPort = aw.mountPort 416 } 417 } 418 intercepts[ii.Id] = ic 419 if i > 0 { 420 sb.WriteByte(',') 421 } 422 sb.WriteString(ii.Spec.Name) 423 sb.WriteByte('=') 424 sb.WriteString(ii.PodIp) 425 } 426 sb.WriteByte(']') 427 dlog.Debugf(ctx, "setCurrentIntercepts(%s)", sb.String()) 428 429 // Cancel those that no longer exists 430 for id, ic := range s.currentIntercepts { 431 if _, ok := intercepts[id]; !ok { 432 dlog.Debugf(ctx, "Cancelling context for intercept %s", ic.Spec.Name) 433 ic.cancel() 434 } 435 } 436 s.currentIntercepts = intercepts 437 s.reconcileAPIServers(ctx) 438 } 439 440 func InterceptError(tp common.InterceptError, err error) *rpc.InterceptResult { 441 return &rpc.InterceptResult{ 442 Error: tp, 443 ErrorText: err.Error(), 444 ErrorCategory: int32(errcat.GetCategory(err)), 445 } 446 } 447 448 type interceptInfo struct { 449 // Information provided by the traffic manager as response to the PrepareIntercept call 450 preparedIntercept *manager.PreparedIntercept 451 } 452 453 func (s *interceptInfo) InterceptResult() *rpc.InterceptResult { 454 pi := s.preparedIntercept 455 return &rpc.InterceptResult{ 456 ServiceUid: pi.ServiceUid, 457 WorkloadKind: pi.WorkloadKind, 458 } 459 } 460 461 func (s *interceptInfo) PortIdentifier() (agentconfig.PortIdentifier, error) { 462 var spi string 463 if s.preparedIntercept.ServicePortName == "" { 464 spi = strconv.Itoa(int(s.preparedIntercept.ServicePort)) 465 } else { 466 spi = s.preparedIntercept.ServicePortName 467 } 468 return agentconfig.NewPortIdentifier(s.preparedIntercept.Protocol, spi) 469 } 470 471 func (s *interceptInfo) PreparedIntercept() *manager.PreparedIntercept { 472 return s.preparedIntercept 473 } 474 475 func (s *session) ensureNoInterceptConflict(ir *rpc.CreateInterceptRequest) *rpc.InterceptResult { 476 s.currentInterceptsLock.Lock() 477 defer s.currentInterceptsLock.Unlock() 478 spec := ir.Spec 479 for _, iCept := range s.currentIntercepts { 480 switch { 481 case iCept.Spec.Name == spec.Name: 482 return InterceptError(common.InterceptError_ALREADY_EXISTS, errcat.User.New(spec.Name)) 483 case iCept.Spec.TargetPort == spec.TargetPort && iCept.Spec.TargetHost == spec.TargetHost: 484 return &rpc.InterceptResult{ 485 Error: common.InterceptError_LOCAL_TARGET_IN_USE, 486 ErrorText: spec.Name, 487 ErrorCategory: int32(errcat.User), 488 InterceptInfo: iCept.InterceptInfo, 489 } 490 case ir.MountPoint != "" && iCept.ClientMountPoint == ir.MountPoint: 491 return &rpc.InterceptResult{ 492 Error: common.InterceptError_MOUNT_POINT_BUSY, 493 ErrorText: spec.Name, 494 ErrorCategory: int32(errcat.User), 495 InterceptInfo: iCept.InterceptInfo, 496 } 497 } 498 } 499 return nil 500 } 501 502 // CanIntercept checks if it is possible to create an intercept for the given request. The intercept can proceed 503 // only if the returned rpc.InterceptResult is nil. The returned runtime.Object is either nil, indicating a local 504 // intercept, or the workload for the intercept. 505 func (s *session) CanIntercept(c context.Context, ir *rpc.CreateInterceptRequest) (userd.InterceptInfo, *rpc.InterceptResult) { 506 s.waitForSync(c) 507 spec := ir.Spec 508 if spec.Namespace == "" { 509 spec.Namespace = s.Namespace 510 } else if s.Namespace != spec.Namespace { 511 return nil, InterceptError(common.InterceptError_NAMESPACE_AMBIGUITY, errcat.User.Newf("%s,%s", s.Namespace, spec.Namespace)) 512 } 513 514 self := s.self 515 if er := s.ensureNoInterceptConflict(ir); er != nil { 516 return nil, er 517 } 518 if spec.Agent == "" { 519 return nil, nil 520 } 521 522 mgrIr := &manager.CreateInterceptRequest{ 523 Session: s.SessionInfo(), 524 InterceptSpec: spec, 525 } 526 if er := self.InterceptProlog(c, mgrIr); er != nil { 527 return nil, er 528 } 529 pi, err := s.managerClient.PrepareIntercept(c, mgrIr) 530 if err != nil { 531 return nil, InterceptError(common.InterceptError_TRAFFIC_MANAGER_ERROR, err) 532 } 533 if pi.Error != "" { 534 return nil, InterceptError(common.InterceptError_TRAFFIC_MANAGER_ERROR, errcat.Category(pi.ErrorCategory).Newf(pi.Error)) 535 } 536 537 iInfo := &interceptInfo{preparedIntercept: pi} 538 return iInfo, nil 539 } 540 541 func (s *session) NewCreateInterceptRequest(spec *manager.InterceptSpec) *manager.CreateInterceptRequest { 542 return &manager.CreateInterceptRequest{ 543 Session: s.self.SessionInfo(), 544 InterceptSpec: spec, 545 } 546 } 547 548 // AddIntercept adds one intercept. 549 func (s *session) AddIntercept(c context.Context, ir *rpc.CreateInterceptRequest) *rpc.InterceptResult { 550 self := s.self 551 iInfo, result := self.CanIntercept(c, ir) 552 if result != nil { 553 return result 554 } 555 556 spec := ir.Spec 557 if iInfo == nil { 558 return &rpc.InterceptResult{Error: common.InterceptError_UNSPECIFIED} 559 } 560 561 spec.Client = s.userAndHost 562 if spec.Mechanism == "" { 563 spec.Mechanism = "tcp" 564 } 565 566 mgrClient := self.ManagerClient() 567 568 // iInfo.preparedIntercept == nil means that we're using an older traffic-manager, incapable 569 // of using PrepareIntercept. 570 pi := iInfo.PreparedIntercept() 571 // Make spec port identifier unambiguous. 572 spec.ServiceName = pi.ServiceName 573 spec.ServicePortName = pi.ServicePortName 574 spec.ServicePort = pi.ServicePort 575 spec.Protocol = pi.Protocol 576 pti, err := iInfo.PortIdentifier() 577 if err != nil { 578 return InterceptError(common.InterceptError_MISCONFIGURED_WORKLOAD, err) 579 } 580 spec.ServicePortIdentifier = pti.String() 581 result = iInfo.InterceptResult() 582 583 spec.ServiceUid = result.ServiceUid 584 spec.WorkloadKind = result.WorkloadKind 585 586 dlog.Debugf(c, "creating intercept %s", spec.Name) 587 tos := client.GetConfig(c).Timeouts() 588 spec.RoundtripLatency = int64(tos.Get(client.TimeoutRoundtripLatency)) * 2 // Account for extra hop 589 spec.DialTimeout = int64(tos.Get(client.TimeoutEndpointDial)) 590 c, cancel := tos.TimeoutContext(c, client.TimeoutIntercept) 591 defer cancel() 592 593 // The agent is in place and the traffic-manager has acknowledged the creation of the intercept. It 594 // should become active within a few seconds. 595 waitCh := make(chan interceptResult, 2) // Need a buffer because reply can come before we're reading the channel, 596 s.currentInterceptsLock.Lock() 597 s.interceptWaiters[spec.Name] = &awaitIntercept{ 598 mountPoint: ir.MountPoint, 599 mountPort: ir.LocalMountPort, 600 waitCh: waitCh, 601 } 602 s.currentInterceptsLock.Unlock() 603 defer func() { 604 s.currentInterceptsLock.Lock() 605 if _, ok := s.interceptWaiters[spec.Name]; ok { 606 delete(s.interceptWaiters, spec.Name) 607 close(waitCh) 608 } 609 s.currentInterceptsLock.Unlock() 610 }() 611 612 ii, err := mgrClient.CreateIntercept(c, self.NewCreateInterceptRequest(spec)) 613 if err != nil { 614 dlog.Debugf(c, "manager responded to CreateIntercept with error %v", err) 615 return InterceptError(common.InterceptError_TRAFFIC_MANAGER_ERROR, err) 616 } 617 618 dlog.Debugf(c, "created intercept %s", ii.Spec.Name) 619 620 success := false 621 defer func() { 622 if !success { 623 dlog.Debugf(c, "intercept %s failed to create, will remove...", ii.Spec.Name) 624 625 // Make an attempt to remove the created intercept using a time limited Context. Our 626 // context is already done. 627 rc, cancel := context.WithTimeout(context.WithoutCancel(c), 5*time.Second) 628 defer cancel() 629 if removeErr := self.RemoveIntercept(rc, ii.Spec.Name); removeErr != nil { 630 dlog.Warnf(c, "failed to remove failed intercept %s: %v", ii.Spec.Name, removeErr) 631 } 632 } 633 }() 634 635 // Wait for the intercept to transition from WAITING or NO_AGENT to ACTIVE. This 636 // might result in more than one event. 637 for { 638 select { 639 case <-c.Done(): 640 return InterceptError(common.InterceptError_FAILED_TO_ESTABLISH, client.CheckTimeout(c, c.Err())) 641 case wr := <-waitCh: 642 if wr.err != nil { 643 return InterceptError(common.InterceptError_FAILED_TO_ESTABLISH, wr.err) 644 } 645 ic := wr.intercept 646 ii = ic.InterceptInfo 647 if ii.Disposition != manager.InterceptDispositionType_ACTIVE { 648 continue 649 } 650 result.InterceptInfo = ii 651 select { 652 case <-c.Done(): 653 return InterceptError(common.InterceptError_FAILED_TO_ESTABLISH, client.CheckTimeout(c, c.Err())) 654 case <-wr.mountsDone: 655 } 656 if er := self.InterceptEpilog(c, ir, result); er != nil { 657 return er 658 } 659 success = true // Prevent removal in deferred function 660 return result 661 } 662 } 663 } 664 665 func (s *session) InterceptProlog(context.Context, *manager.CreateInterceptRequest) *rpc.InterceptResult { 666 return nil 667 } 668 669 func (s *session) InterceptEpilog(context.Context, *rpc.CreateInterceptRequest, *rpc.InterceptResult) *rpc.InterceptResult { 670 return nil 671 } 672 673 // RemoveIntercept removes one intercept by name. 674 func (s *session) RemoveIntercept(c context.Context, name string) error { 675 dlog.Debugf(c, "Removing intercept %s", name) 676 ii := s.getInterceptByName(name) 677 if ii == nil { 678 dlog.Debugf(c, "Intercept %s was already removed", name) 679 return nil 680 } 681 return s.removeIntercept(c, ii) 682 } 683 684 func (s *session) removeIntercept(c context.Context, ic *intercept) error { 685 name := ic.Spec.Name 686 687 // No use trying to kill processes when using a container based daemon, unless 688 // that container based daemon runs as a normal user daemon with separate root daemon. 689 // Some users run a standard telepresence client together with intercepts in one 690 // single container. 691 if !(proc.RunningInContainer() && userd.GetService(c).RootSessionInProcess()) { 692 if ic.containerName != "" { 693 if err := docker.StopContainer(docker.EnableClient(c), ic.containerName); err != nil { 694 dlog.Error(c, err) 695 } 696 } else if ic.pid != 0 { 697 p, err := os.FindProcess(ic.pid) 698 if err != nil { 699 dlog.Errorf(c, "unable to find interceptor for intercept %s with pid %d", name, ic.pid) 700 } else { 701 dlog.Debugf(c, "terminating interceptor for intercept %s with pid %d", name, ic.pid) 702 _ = proc.Terminate(p) 703 } 704 } 705 } 706 707 // Unmount filesystems before telling the manager to remove the intercept 708 ic.cancel() 709 ic.wg.Wait() 710 711 dlog.Debugf(c, "telling manager to remove intercept %s", name) 712 c, cancel := client.GetConfig(c).Timeouts().TimeoutContext(c, client.TimeoutTrafficManagerAPI) 713 defer cancel() 714 _, err := s.managerClient.RemoveIntercept(c, &manager.RemoveInterceptRequest2{ 715 Session: s.SessionInfo(), 716 Name: name, 717 }) 718 return err 719 } 720 721 // AddInterceptor associates the given intercept with a running process. This ensures that 722 // the running process will be signalled when the intercept is removed. 723 func (s *session) AddInterceptor(id string, ih *rpc.Interceptor) error { 724 s.currentInterceptsLock.Lock() 725 if ci, ok := s.currentIntercepts[id]; ok { 726 ci.pid = int(ih.Pid) 727 ci.containerName = ih.ContainerName 728 } 729 s.currentInterceptsLock.Unlock() 730 return nil 731 } 732 733 func (s *session) RemoveInterceptor(id string) error { 734 s.currentInterceptsLock.Lock() 735 if ci, ok := s.currentIntercepts[id]; ok { 736 ci.pid = 0 737 ci.containerName = "" 738 } 739 s.currentInterceptsLock.Unlock() 740 return nil 741 } 742 743 // GetInterceptSpec returns the InterceptSpec for the given name, or nil if no such spec exists. 744 func (s *session) GetInterceptSpec(name string) *manager.InterceptSpec { 745 if ic := s.getInterceptByName(name); ic != nil { 746 return ic.Spec 747 } 748 return nil 749 } 750 751 // GetInterceptInfo returns the InterceptInfo for the given name, or nil if no such info exists. 752 func (s *session) GetInterceptInfo(name string) *manager.InterceptInfo { 753 if ic := s.getInterceptByName(name); ic != nil { 754 ii := ic.InterceptInfo 755 if ic.containerName != "" { 756 if ii.Environment == nil { 757 ii.Environment = make(map[string]string, 1) 758 } 759 ii.Environment["TELEPRESENCE_HANDLER_CONTAINER_NAME"] = ic.containerName 760 } 761 return ii 762 } 763 return nil 764 } 765 766 // GetInterceptSpec returns the InterceptSpec for the given name, or nil if no such spec exists. 767 func (s *session) getInterceptByName(name string) (found *intercept) { 768 s.currentInterceptsLock.Lock() 769 for _, ic := range s.currentIntercepts { 770 if ic.Spec.Name == name { 771 found = ic 772 break 773 } 774 } 775 s.currentInterceptsLock.Unlock() 776 return found 777 } 778 779 // InterceptsForWorkload returns the client's current intercepts on the given namespace and workload combination. 780 func (s *session) InterceptsForWorkload(workloadName, namespace string) []*manager.InterceptSpec { 781 wlis := make([]*manager.InterceptSpec, 0) 782 for _, ic := range s.getCurrentIntercepts() { 783 if ic.Spec.Agent == workloadName && ic.Spec.Namespace == namespace { 784 wlis = append(wlis, ic.Spec) 785 } 786 } 787 return wlis 788 } 789 790 // ClearIntercepts removes all intercepts. 791 func (s *session) ClearIntercepts(c context.Context) error { 792 for _, ic := range s.getCurrentIntercepts() { 793 dlog.Debugf(c, "Clearing intercept %s", ic.Spec.Name) 794 err := s.removeIntercept(c, ic) 795 if err != nil && grpcStatus.Code(err) != grpcCodes.NotFound { 796 return err 797 } 798 } 799 return nil 800 } 801 802 // reconcileAPIServers start/stop API servers as needed based on the TELEPRESENCE_API_PORT environment variable 803 // of the currently intercepted agent's env. 804 func (s *session) reconcileAPIServers(ctx context.Context) { 805 wantedPorts := make(map[int]struct{}) 806 wantedMatchers := make(map[string]*manager.InterceptInfo) 807 808 agentAPIPort := func(ii *manager.InterceptInfo) int { 809 is := ii.Spec 810 if ps, ok := ii.Environment[agentconfig.EnvAPIPort]; ok { 811 port, err := strconv.ParseUint(ps, 10, 16) 812 if err == nil { 813 return int(port) 814 } 815 dlog.Errorf(ctx, "unable to parse TELEPRESENCE_API_PORT(%q) to a port number in agent %s.%s: %v", ps, is.Agent, is.Namespace, err) 816 } 817 return 0 818 } 819 820 for _, ic := range s.currentIntercepts { 821 ii := ic.InterceptInfo 822 if ic.Disposition == manager.InterceptDispositionType_ACTIVE { 823 if port := agentAPIPort(ii); port > 0 { 824 wantedPorts[port] = struct{}{} 825 wantedMatchers[ic.Id] = ii 826 } 827 } 828 } 829 for p, as := range s.currentAPIServers { 830 if _, ok := wantedPorts[p]; !ok { 831 as.cancel() 832 delete(s.currentAPIServers, p) 833 } 834 } 835 for p := range wantedPorts { 836 if _, ok := s.currentAPIServers[p]; !ok { 837 s.newAPIServerForPort(ctx, p) 838 } 839 } 840 for id := range s.currentMatchers { 841 if _, ok := wantedMatchers[id]; !ok { 842 delete(s.currentMatchers, id) 843 } 844 } 845 for id, ic := range wantedMatchers { 846 if _, ok := s.currentMatchers[id]; !ok { 847 s.newMatcher(ctx, ic) 848 } 849 } 850 } 851 852 func (s *session) newAPIServerForPort(ctx context.Context, port int) { 853 svr := restapi.NewServer(s) 854 as := apiServer{Server: svr} 855 ctx, as.cancel = context.WithCancel(ctx) 856 if s.currentAPIServers == nil { 857 s.currentAPIServers = map[int]*apiServer{port: &as} 858 } else { 859 s.currentAPIServers[port] = &as 860 } 861 go func() { 862 if err := svr.ListenAndServe(ctx, port); err != nil { 863 dlog.Error(ctx, err) 864 } 865 }() 866 } 867 868 func (s *session) newMatcher(ctx context.Context, ic *manager.InterceptInfo) { 869 m, err := matcher.NewRequestFromMap(ic.Headers) 870 if err != nil { 871 dlog.Error(ctx, err) 872 return 873 } 874 if s.currentMatchers == nil { 875 s.currentMatchers = make(map[string]*apiMatcher) 876 } 877 s.currentMatchers[ic.Id] = &apiMatcher{ 878 requestMatcher: m, 879 metadata: ic.Metadata, 880 } 881 } 882 883 func (s *session) InterceptInfo(ctx context.Context, callerID, path string, _ uint16, headers http.Header) (*restapi.InterceptInfo, error) { 884 s.currentInterceptsLock.Lock() 885 defer s.currentInterceptsLock.Unlock() 886 887 r := &restapi.InterceptInfo{ClientSide: true} 888 am := s.currentMatchers[callerID] 889 switch { 890 case am == nil: 891 dlog.Debugf(ctx, "no matcher found for callerID %s", callerID) 892 case am.requestMatcher.Matches(path, headers): 893 dlog.Debugf(ctx, "%s: matcher %s\nmatches path %q and headers\n%s", callerID, am.requestMatcher, path, matcher.HeaderStringer(headers)) 894 r.Intercepted = true 895 r.Metadata = am.metadata 896 default: 897 dlog.Debugf(ctx, "%s: matcher %s\nmatches path %q and headers\n%s", callerID, am.requestMatcher, path, matcher.HeaderStringer(headers)) 898 } 899 return r, nil 900 }