github.phpd.cn/cilium/cilium@v1.6.12/pkg/workloads/docker.go (about) 1 // Copyright 2017-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package workloads 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/json" 21 "fmt" 22 "io" 23 "io/ioutil" 24 "net" 25 "net/http" 26 "strconv" 27 "strings" 28 "time" 29 30 "github.com/cilium/cilium/api/v1/models" 31 "github.com/cilium/cilium/common/addressing" 32 "github.com/cilium/cilium/pkg/endpoint" 33 "github.com/cilium/cilium/pkg/endpoint/connector" 34 endpointid "github.com/cilium/cilium/pkg/endpoint/id" 35 "github.com/cilium/cilium/pkg/endpointmanager" 36 "github.com/cilium/cilium/pkg/k8s/utils" 37 "github.com/cilium/cilium/pkg/logging/logfields" 38 "github.com/cilium/cilium/pkg/metrics" 39 "github.com/cilium/cilium/pkg/node" 40 "github.com/cilium/cilium/pkg/option" 41 "github.com/cilium/cilium/plugins/cilium-docker/driver" 42 43 "github.com/containernetworking/plugins/pkg/ns" 44 dTypes "github.com/docker/docker/api/types" 45 dTypesEvents "github.com/docker/docker/api/types/events" 46 dNetwork "github.com/docker/docker/api/types/network" 47 "github.com/docker/docker/client" 48 "github.com/sirupsen/logrus" 49 "github.com/vishvananda/netlink" 50 ctx "golang.org/x/net/context" 51 "golang.org/x/sys/unix" 52 ) 53 54 const ( 55 Docker WorkloadRuntimeType = "docker" 56 ) 57 58 var ( 59 dockerInstance = &dockerModule{ 60 opts: workloadRuntimeOpts{ 61 EpOpt: &workloadRuntimeOpt{ 62 description: "Addresses of docker endpoint", 63 value: "unix:///var/run/docker.sock", 64 }, 65 DatapathModeOpt: &workloadRuntimeOpt{ 66 description: "Cilium datapath mode", 67 value: option.DatapathModeVeth, 68 }, 69 }, 70 } 71 ) 72 73 type dockerModule struct { 74 opts workloadRuntimeOpts 75 } 76 77 func init() { 78 registerWorkload(Docker, dockerInstance) 79 } 80 81 func (c *dockerModule) getName() string { 82 return string(Docker) 83 } 84 85 func (c *dockerModule) setConfigDummy() { 86 } 87 88 func (c *dockerModule) setConfig(opts map[string]string) error { 89 return setOpts(opts, c.opts) 90 } 91 92 func (c *dockerModule) getConfig() map[string]string { 93 return getOpts(c.opts) 94 } 95 96 func (c *dockerModule) newClient() (WorkloadRuntime, error) { 97 return newDockerClient(c.opts) 98 } 99 100 type dockerClient struct { 101 *client.Client 102 datapathMode string 103 } 104 105 func newDockerClient(opts workloadRuntimeOpts) (WorkloadRuntime, error) { 106 defaultHeaders := map[string]string{"User-Agent": "cilium"} 107 ep := opts[EpOpt] 108 c, err := client.NewClient(ep.value, "v1.21", nil, defaultHeaders) 109 if err != nil { 110 return nil, err 111 } 112 dpMode, found := opts[DatapathModeOpt] 113 if !found { 114 return nil, fmt.Errorf("'%s' option not found", DatapathModeOpt) 115 } 116 117 return &dockerClient{Client: c, datapathMode: dpMode.value}, nil 118 } 119 120 func newDockerClientMock(opts workloadRuntimeOpts) (WorkloadRuntime, error) { 121 mwc := newMockClient(networksMock()) 122 c, err := client.NewClient("http://127.0.0.1:2375", "v1.21", mwc, nil) 123 if err != nil { 124 return nil, err 125 } 126 127 return &dockerClient{Client: c}, nil 128 } 129 130 // Helper function to mock docker calls 131 type transportFunc func(*http.Request) (*http.Response, error) 132 133 // Helper function to mock docker calls 134 func (tf transportFunc) RoundTrip(req *http.Request) (*http.Response, error) { 135 return tf(req) 136 } 137 138 // Helper function to mock docker calls 139 func newMockClient(doer func(*http.Request) (*http.Response, error)) *http.Client { 140 v := &http.Transport{ 141 Proxy: http.ProxyFromEnvironment, 142 DialContext: (&net.Dialer{ 143 Timeout: 30 * time.Second, 144 KeepAlive: 30 * time.Second, 145 DualStack: true, 146 }).DialContext, 147 MaxIdleConns: 100, 148 IdleConnTimeout: 90 * time.Second, 149 TLSHandshakeTimeout: 10 * time.Second, 150 ExpectContinueTimeout: 1 * time.Second, 151 } 152 v.RegisterProtocol("http", transportFunc(doer)) 153 return &http.Client{ 154 Transport: http.RoundTripper(v), 155 } 156 } 157 158 // Helper function to mock docker calls to networks endpoint 159 func networksMock() func(req *http.Request) (*http.Response, error) { 160 return func(req *http.Request) (*http.Response, error) { 161 if !strings.HasPrefix(req.URL.Path, "/v1.21/networks") { 162 return nil, fmt.Errorf("Only expecting /v1.21/networks requests, got %s", req.URL.Path) 163 } 164 165 header := http.Header{} 166 header.Set("Content-Type", "application/json") 167 168 body, err := json.Marshal(&dTypes.NetworkResource{ 169 Name: "12345", 170 ID: "1234", 171 Scope: "global", 172 Driver: "cilium-net", 173 EnableIPv6: true, 174 IPAM: dNetwork.IPAM{}, 175 Internal: false, 176 // this map contains all endpoints except 259 177 Containers: map[string]dTypes.EndpointResource{ 178 "603e047d2268a57f5a5f93f7f9e1263e9207e348a06654bf64948def00100256": { 179 EndpointID: "93529fda8c401a071d21d6bd46fdf5499b9014dcb5a35f2e3efaa8d800200256", 180 }, 181 "603e047d2268a57f5a5f93f7f9e1263e9207e348a06654bf64948def00100257": { 182 EndpointID: "93529fda8c401a071d21d6bd46fdf5499b9014dcb5a35f2e3efaa8d800200257", 183 }, 184 "603e047d2268a57f5a5f93f7f9e1263e9207e348a06654bf64948def00100258": { 185 EndpointID: "93529fda8c401a071d21d6bd46fdf5499b9014dcb5a35f2e3efaa8d800100258", 186 }, 187 }, 188 Options: map[string]string{}, 189 Labels: map[string]string{}, 190 }) 191 if err != nil { 192 return nil, err 193 } 194 195 return &http.Response{ 196 StatusCode: 200, 197 Body: ioutil.NopCloser(bytes.NewReader(body)), 198 Header: header, 199 }, nil 200 } 201 } 202 203 // IsRunning returns false if the provided endpoint cannot be associated with a 204 // running workload. The runtime must be reachable to make this decision. 205 func (d *dockerClient) IsRunning(ep *endpoint.Endpoint) bool { 206 if d == nil { 207 return false 208 } 209 210 runtimeRunning := false 211 212 networkID := ep.GetDockerNetworkID() 213 containerID := ep.GetContainerID() 214 215 if networkID != "" { 216 nls, err := d.NetworkInspect(ctx.Background(), networkID, dTypes.NetworkInspectOptions{}) 217 if client.IsErrNotFound(err) { 218 return false 219 } 220 221 if err == nil { 222 runtimeRunning = true 223 found := false 224 for _, v := range nls.Containers { 225 if v.EndpointID == ep.DockerEndpointID { 226 found = true 227 break 228 } 229 } 230 231 if found { 232 return found 233 } 234 } 235 } 236 237 if containerID != "" { 238 cont, err := d.ContainerInspect(ctx.Background(), containerID) 239 if client.IsErrNotFound(err) { 240 return false 241 } 242 243 if err == nil { 244 // Container may exist but is not in running state 245 return cont.State.Running 246 } 247 } 248 249 return !runtimeRunning 250 } 251 252 // Status returns the status of the workload runtime 253 func (d *dockerClient) Status() *models.Status { 254 if d == nil { 255 return workloadStatusDisabled 256 } 257 258 if _, err := d.Info(ctx.Background()); err != nil { 259 return &models.Status{State: models.StatusStateFailure, Msg: "docker daemon: " + err.Error()} 260 } 261 262 return &models.Status{State: models.StatusStateOk, Msg: "docker daemon: OK"} 263 } 264 265 // EnableEventListener watches for docker events. Performs the plumbing for the 266 // containers started or dead. 267 func (d *dockerClient) EnableEventListener() (chan<- *EventMessage, error) { 268 if d == nil { 269 log.Debug("Not enabling docker event listener because dockerClient is nil") 270 return nil, nil 271 } 272 log.Info("Enabling docker event listener") 273 274 ws := newWatcherState() 275 276 since := time.Now() 277 eo := dTypes.EventsOptions{Since: strconv.FormatInt(since.Unix(), 10)} 278 r, err := d.Events(ctx.Background(), eo) 279 280 go d.listenForDockerEvents(ws, r, err) 281 282 log.Debug("Started to listen for docker events") 283 return nil, nil 284 } 285 286 func (d *dockerClient) listenForDockerEvents(ws *watcherState, messagesCh <-chan dTypesEvents.Message, errCh <-chan error) { 287 for { 288 select { 289 case err, ok := <-errCh: 290 if !ok || err == io.EOF { 291 log.Info("Docker error channel closed") 292 return 293 } 294 log.WithError(err).Error("Error while reading docker events") 295 // Sleep to avoid consuming 100% CPU 296 time.Sleep(100 * time.Millisecond) 297 case e, ok := <-messagesCh: 298 metrics.EventTSContainerd.SetToCurrentTime() 299 if !ok { 300 log.Error("docker events channel closed") 301 return 302 } 303 if e.ID == "" || e.Type != "container" { 304 continue 305 } 306 log.WithFields(logrus.Fields{ 307 "event": e.Status, 308 logfields.ContainerID: shortContainerID(e.ID), 309 }).Debug("Queueing container event") 310 311 switch e.Status { 312 case "start": 313 ws.enqueueByContainerID(e.ID, &EventMessage{WorkloadID: e.ID, EventType: EventTypeStart}) 314 case "die": 315 ws.enqueueByContainerID(e.ID, &EventMessage{WorkloadID: e.ID, EventType: EventTypeDelete}) 316 } 317 } 318 } 319 } 320 321 func (d *dockerClient) processEvents(events chan EventMessage) { 322 for m := range events { 323 if m.WorkloadID != "" { 324 log.WithFields(logrus.Fields{ 325 "event": m.EventType, 326 logfields.ContainerID: shortContainerID(m.WorkloadID), 327 }).Debug("Processing event for Container") 328 d.processEvent(m) 329 } 330 } 331 } 332 333 func (d *dockerClient) processEvent(m EventMessage) { 334 switch m.EventType { 335 case EventTypeStart: 336 // A real event overwrites any memory of ignored containers 337 stopIgnoringContainer(m.WorkloadID) 338 d.handleCreateWorkload(m.WorkloadID, true) 339 case EventTypeDelete: 340 Owner().DeleteEndpoint(endpointid.NewID(endpointid.ContainerIdPrefix, m.WorkloadID)) 341 } 342 } 343 344 func (d *dockerClient) getEndpointByIP(cont *dTypes.ContainerJSON) *endpoint.Endpoint { 345 scopedLog := log.WithField(logfields.ContainerID, shortContainerID(cont.ID)) 346 347 if cont.NetworkSettings == nil { 348 scopedLog.Debug("No network settings included in event") 349 return nil 350 } 351 352 for _, contNetwork := range cont.NetworkSettings.Networks { 353 if contNetwork == nil { 354 continue 355 } 356 357 if contNetwork.GlobalIPv6Address != "" { 358 id := endpointid.NewID(endpointid.IPv6Prefix, contNetwork.GlobalIPv6Address) 359 if ep, err := endpointmanager.Lookup(id); err != nil { 360 log.WithError(err).WithField(logfields.V6Prefix, id).Warning("Unable to lookup endpoint by IP prefix") 361 } else if ep != nil { 362 return ep 363 } 364 } 365 366 if contNetwork.IPAddress != "" { 367 id := endpointid.NewID(endpointid.IPv4Prefix, contNetwork.IPAddress) 368 if ep, err := endpointmanager.Lookup(id); err != nil { 369 log.WithError(err).WithField(logfields.V4Prefix, id).Warning("Unable to lookup endpoint by IP prefix") 370 } else if ep != nil { 371 return ep 372 } 373 } 374 } 375 376 scopedLog.Debug("IP address assigned by Cilium could not be derived from pod") 377 return nil 378 } 379 380 func (d *dockerClient) getCiliumIPv6(networks map[string]*dNetwork.EndpointSettings) *addressing.CiliumIPv6 { 381 for _, contNetwork := range networks { 382 if contNetwork == nil { 383 continue 384 } 385 386 scopedLog := log.WithField(logfields.EndpointID, contNetwork.EndpointID) 387 388 ipv6gw := net.ParseIP(contNetwork.IPv6Gateway) 389 if !ipv6gw.Equal(node.GetIPv6Router()) { 390 scopedLog.WithField(logfields.Object, contNetwork).Debug("Skipping network because of gateway mismatch") 391 continue 392 } 393 ip, err := addressing.NewCiliumIPv6(contNetwork.GlobalIPv6Address) 394 if err == nil { 395 return &ip 396 } 397 } 398 return nil 399 } 400 401 func (d *dockerClient) handleCreateWorkload(id string, retry bool) { 402 var sandboxKey string // path to the container network namespace 403 404 scopedLog := log.WithFields(logrus.Fields{ 405 logfields.ContainerID: shortContainerID(id), 406 fieldMaxRetry: EndpointCorrelationMaxRetries, 407 "willRetry": retry, 408 }) 409 410 for try := 1; try <= EndpointCorrelationMaxRetries; try++ { 411 retryLog := scopedLog.WithField("retry", try) 412 413 if try > 1 { 414 if retry { 415 retryLog.Debug("Waiting for endpoint representing container to appear") 416 time.Sleep(EndpointCorrelationSleepTime(try)) 417 } else { 418 break 419 } 420 } 421 422 dockerContainer, err := d.ContainerInspect(ctx.Background(), id) 423 if err != nil { 424 retryLog.WithError(err).Debug("Unable to inspect container after container create event") 425 continue 426 } 427 428 containerName := dockerContainer.Name 429 if containerName == "" { 430 retryLog.Warn("Container name not set in event from docker") 431 } 432 433 ep := endpointmanager.LookupContainerID(id) 434 if ep == nil { 435 // Container ID is not yet known; try and find endpoint via 436 // the IP address assigned. 437 ep = d.getEndpointByIP(&dockerContainer) 438 } 439 440 if ep == nil { 441 // Endpoint does not exist yet. This indicates that the 442 // orchestration system has not requested us to handle 443 // networking for this container yet (or never will). 444 // We will retry a couple of times to wait for this to 445 // happen. 446 retryLog.WithFields(logrus.Fields{ 447 "containerName": containerName, 448 }).Debug("Container event could not be associated with endpoint yet") 449 continue 450 } 451 452 retryLog.WithFields(logrus.Fields{ 453 "containerName": containerName, 454 logfields.EndpointID: ep.ID, 455 }).Debug("Associated container event with endpoint") 456 457 if dockerContainer.NetworkSettings != nil { 458 sandboxKey = dockerContainer.NetworkSettings.SandboxKey 459 id := dockerContainer.NetworkSettings.EndpointID 460 if id != "" { 461 ep.SetDockerEndpointID(id) 462 } 463 } 464 465 // Docker appends '/' to container names. 466 ep.SetContainerName(strings.Trim(containerName, "/")) 467 468 // Finish ipvlan initialization if endpoint is connected via Docker libnetwork (cilium-docker) 469 if ep.GetDockerNetworkID() != "" && d.datapathMode == option.DatapathModeIpvlan { 470 if err := finishIpvlanInit(ep, sandboxKey); err != nil { 471 retryLog.WithError(err).Warn("Cannot finish ipvlan initialization") 472 continue 473 } 474 } 475 476 allLabels := map[string]string{} 477 if dockerContainer.Config != nil { 478 allLabels = dockerContainer.Config.Labels 479 } 480 481 processCreateWorkload(ep, id, allLabels) 482 483 return 484 } 485 486 startIgnoringContainer(id) 487 488 scopedLog.Info("No request received to manage networking for container") 489 } 490 491 // IgnoreRunningWorkloads checks for already running containers and checks 492 // their IP address, then adds the containers to the list of ignored containers 493 // and allocates the IPs they are using to prevent future collisions. 494 func (d *dockerClient) IgnoreRunningWorkloads() { 495 if d == nil { 496 return 497 } 498 499 conts, err := d.ContainerList(ctx.Background(), dTypes.ContainerListOptions{}) 500 if err != nil { 501 return 502 } 503 for _, cont := range conts { 504 scopedLog := log.WithField(logfields.ContainerID, cont.ID) 505 scopedLog.Info("Adding running container to the list of ignored containers") 506 startIgnoringContainer(cont.ID) 507 if cont.NetworkSettings == nil { 508 continue 509 } 510 cIP := d.getCiliumIPv6(cont.NetworkSettings.Networks) 511 if cIP == nil { 512 continue 513 } 514 allocator.BlacklistIP(cIP.IP(), "ignored docker container: "+cont.ID) 515 // TODO Release this address when the ignored container leaves 516 scopedLog.WithFields(logrus.Fields{ 517 logfields.IPAddr: cIP.IP(), 518 }).Info("Found container running with potential " + 519 "collision IP address, adding to the list " + 520 "of allocated IPs") 521 } 522 } 523 524 func (d *dockerClient) workloadIDsList(ctx context.Context) ([]string, error) { 525 cList, err := d.ContainerList(ctx, dTypes.ContainerListOptions{All: false}) 526 if err != nil { 527 return nil, err 528 529 } 530 531 cont := make([]string, 0, len(cList)) 532 for _, c := range cList { 533 cont = append(cont, c.ID) 534 } 535 return cont, nil 536 } 537 538 // GetAllInfraContainersPID returns a map that maps container IDs to the PID 539 // of that container. 540 func (d *dockerClient) GetAllInfraContainersPID() (map[string]int, error) { 541 timeoutCtx, cancel := ctx.WithTimeout(ctx.Background(), 10*time.Second) 542 defer cancel() 543 544 cList, err := Client().workloadIDsList(timeoutCtx) 545 if err != nil { 546 log.WithError(err).Error("Failed to retrieve the container list") 547 return nil, err 548 } 549 pids := map[string]int{} 550 for _, contID := range cList { 551 cJSON, err := d.ContainerInspect(context.Background(), contID) 552 if err != nil { 553 continue 554 } 555 if cJSON.Config == nil || !utils.IsInfraContainer(cJSON.Config.Labels) { 556 continue 557 } 558 if cJSON.State == nil || !cJSON.State.Running { 559 continue 560 } 561 pids[cJSON.ID] = cJSON.State.Pid 562 } 563 564 return pids, nil 565 } 566 567 // finishIpvlanInit finishes configuring ipvlan slave device of the given endpoint. 568 // 569 // Unfortunately, Docker libnetwork itself moves a netdev to netns of a container 570 // after the Cilium libnetwork plugin driver has responded to a `JoinEndpoint` 571 // request. During the move, the netdev qdisc's get flushed by the kernel. Therefore, 572 // we need to configure the ipvlan slave device in two stages. 573 // 574 // Because the function can be called many times for the same container in parallel, 575 // we need to make the function idempotent. This is achieved by checking 576 // whether the datapath map has been pinned, which indicates previous 577 // successful invocation of the function for the same container, before executing 578 // the configuration stages. 579 // 580 // FIXME: Because of the libnetwork limitation mentioned above, we cannot enforce 581 // policies for an ipvlan slave before a process of a container has started. So, 582 // this enables a window between the two stages during which ALL container traffic 583 // is allowed. 584 func finishIpvlanInit(ep *endpoint.Endpoint, netNsPath string) error { 585 var ipvlanIface string 586 587 if netNsPath == "" { 588 return fmt.Errorf("netNsPath is empty") 589 } 590 591 // Just ignore if the endpoint is dying 592 if err := ep.LockAlive(); err != nil { 593 return nil 594 } 595 defer ep.Unlock() 596 597 if ep.IsDatapathMapPinnedLocked() { 598 // The datapath map is pinned which implies that the post-initialization 599 // for the ipvlan slave has been successfully performed 600 return nil 601 } 602 603 // To access the netns, `/var/run/docker/netns` has to 604 // be bind mounted into the cilium-agent container with 605 // the `rshared` option to prevent from leaking netns 606 netNs, err := ns.GetNS(netNsPath) 607 if err != nil { 608 return fmt.Errorf("Unable to open container netns %s: %s", netNsPath, err) 609 } 610 611 // Docker doesn't report about interfaces used to connect to 612 // container network, so we need to scan all to find the ipvlan slave 613 err = netNs.Do(func(ns.NetNS) error { 614 links, err := netlink.LinkList() 615 if err != nil { 616 return err 617 } 618 for _, link := range links { 619 if link.Type() == "ipvlan" && 620 strings.HasPrefix(link.Attrs().Name, 621 driver.ContainerInterfacePrefix) { 622 ipvlanIface = link.Attrs().Name 623 break 624 } 625 } 626 if ipvlanIface == "" { 627 return fmt.Errorf("ipvlan slave link not found") 628 } 629 return nil 630 }) 631 if err != nil { 632 return fmt.Errorf("Unable to find ipvlan slave in container netns: %s", err) 633 } 634 635 mapFD, mapID, err := connector.SetupIpvlanInRemoteNs(netNs, 636 ipvlanIface, ipvlanIface) 637 if err != nil { 638 return fmt.Errorf("Unable to setup ipvlan slave: %s", err) 639 } 640 // Do not close the fd too early, as the subsequent pinning would 641 // fail due to the map being removed by the kernel 642 defer func() { 643 unix.Close(mapFD) 644 }() 645 646 if err = ep.SetDatapathMapIDAndPinMapLocked(mapID); err != nil { 647 return fmt.Errorf("Unable to pin datapath map: %s", err) 648 } 649 650 return nil 651 }