github.phpd.cn/cilium/cilium@v1.6.12/pkg/workloads/cri.go (about) 1 // Copyright 2018-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 package workloads 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "net" 22 "time" 23 24 "github.com/cilium/cilium/api/v1/models" 25 "github.com/cilium/cilium/common/addressing" 26 "github.com/cilium/cilium/pkg/endpoint" 27 endpointid "github.com/cilium/cilium/pkg/endpoint/id" 28 "github.com/cilium/cilium/pkg/endpointmanager" 29 "github.com/cilium/cilium/pkg/logging/logfields" 30 31 "github.com/containerd/containerd/namespaces" 32 criStore "github.com/containerd/cri/pkg/store" 33 "github.com/sirupsen/logrus" 34 "google.golang.org/grpc" 35 criRuntime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" 36 "k8s.io/kubernetes/pkg/kubelet/util" 37 ) 38 39 func getGRPCCLient(ctx context.Context) (*grpc.ClientConn, error) { 40 ep, ok := ctx.Value(EpOpt).(string) 41 if !ok { 42 return nil, fmt.Errorf("unknown runtime endpoint") 43 } 44 log.Debugf("using CRI endpoint %s", ep) 45 addr, dialer, err := util.GetAddressAndDialer(ep) 46 if err != nil { 47 return nil, err 48 } 49 50 c, cancel := context.WithTimeout(ctx, time.Duration(5*time.Second)) 51 defer cancel() 52 53 conn, err := grpc.DialContext(c, addr, grpc.WithDialer(dialer), grpc.WithInsecure(), grpc.WithBackoffMaxDelay(15*time.Second)) 54 if err != nil { 55 return nil, fmt.Errorf("failed to connect: %s", err) 56 } 57 return conn, nil 58 } 59 60 type criClient struct { 61 criRuntime.RuntimeServiceClient 62 } 63 64 func newCRIClient(ctx context.Context) (*criClient, error) { 65 cc, err := getGRPCCLient(ctx) 66 if err != nil { 67 return nil, err 68 } 69 rsc := criRuntime.NewRuntimeServiceClient(cc) 70 return &criClient{rsc}, nil 71 } 72 73 // IsRunning returns false if the provided endpoint cannot be associated with a 74 // running workload. The runtime must be reachable to make this decision. 75 func (c *criClient) IsRunning(ep *endpoint.Endpoint) bool { 76 if c == nil { 77 return false 78 } 79 80 podID := ep.GetContainerID() 81 82 if podID == "" { 83 return false 84 } 85 86 pssr := criRuntime.PodSandboxStatusRequest{ 87 PodSandboxId: podID, 88 } 89 ns := namespaces.WithNamespace(context.Background(), k8sContainerdNamespace) 90 cont, err := c.RuntimeServiceClient.PodSandboxStatus(ns, &pssr) 91 if err == criStore.ErrNotExist { 92 return false 93 } 94 if err != nil { 95 log.WithError(err).Debugf("unable to get pod sandbox status of %v", pssr) 96 return false 97 } 98 99 return cont.GetStatus().State == criRuntime.PodSandboxState_SANDBOX_READY 100 } 101 102 // Status returns the status of the workload runtime 103 func (c *criClient) Status() *models.Status { 104 if c == nil { 105 return workloadStatusDisabled 106 } 107 108 sreq := &criRuntime.StatusRequest{ 109 Verbose: false, 110 } 111 sresp, err := c.RuntimeServiceClient.Status(context.Background(), sreq) 112 if err != nil { 113 return &models.Status{State: models.StatusStateFailure, Msg: err.Error()} 114 } 115 for _, runtimeCondition := range sresp.Status.Conditions { 116 if !runtimeCondition.Status { 117 return &models.Status{State: models.StatusStateFailure, Msg: runtimeCondition.Message} 118 } 119 120 } 121 return &models.Status{State: models.StatusStateOk, Msg: "cri daemon: Ok"} 122 } 123 124 // EnableEventListener watches for containerD events. Performs the plumbing for the 125 // containers started or dead. 126 func (c *criClient) EnableEventListener() (chan<- *EventMessage, error) { 127 if c == nil { 128 log.Debug("Not enabling CRI event listener because CRI client is nil") 129 return nil, nil 130 } 131 log.Info("Enabling CRI event listener") 132 133 ws := newWatcherState() 134 135 eventsCh := make(chan *EventMessage, 100) 136 go func(state *watcherState, eventsCh <-chan *EventMessage) { 137 for event := range eventsCh { 138 ws.enqueueByContainerID(event.WorkloadID, event) 139 } 140 }(ws, eventsCh) 141 return eventsCh, nil 142 } 143 144 func (c *criClient) processEvent(m EventMessage) { 145 switch m.EventType { 146 case EventTypeStart: 147 req := &criRuntime.PodSandboxStatusRequest{ 148 PodSandboxId: m.WorkloadID, 149 } 150 _, err := c.PodSandboxStatus(context.Background(), req) 151 if err != nil { 152 // ignore containers if not found 153 // startIgnoringContainer(m.WorkloadID) 154 log.WithError(err).Debugf("Unable to get more details for workload %s", m.WorkloadID) 155 return 156 } 157 stopIgnoringContainer(m.WorkloadID) 158 c.handleCreateWorkload(m.WorkloadID, true) 159 case EventTypeDelete: 160 Owner().DeleteEndpoint(endpointid.NewID(endpointid.ContainerIdPrefix, m.WorkloadID)) 161 } 162 } 163 164 func (c *criClient) processEvents(events chan EventMessage) { 165 for m := range events { 166 if m.WorkloadID != "" { 167 log.WithFields(logrus.Fields{ 168 logfields.ContainerID: shortContainerID(m.WorkloadID), 169 }).Debug("Processing event for Container") 170 c.processEvent(m) 171 } 172 } 173 } 174 175 func (c *criClient) getEndpointByPodIP(pod *criRuntime.PodSandboxStatus) *endpoint.Endpoint { 176 scopedLog := log.WithField(logfields.ContainerID, shortContainerID(pod.GetId())) 177 178 if ciliumIP := c.getCiliumIP(pod); ciliumIP != nil { 179 id := endpointid.NewIPPrefixID(ciliumIP.IP()) 180 if ep, err := endpointmanager.Lookup(id); err != nil { 181 log.WithError(err).Warning("Unable to lookup endpoint by IP prefix") 182 } else if ep != nil { 183 return ep 184 } 185 } 186 187 scopedLog.Debug("IP address assigned by Cilium could not be derived from pod") 188 return nil 189 } 190 191 func (c *criClient) getCiliumIP(pod *criRuntime.PodSandboxStatus) addressing.CiliumIP { 192 ip := net.ParseIP(pod.GetNetwork().GetIp()) 193 if ip == nil { 194 return nil 195 } 196 if ip.To4() == nil { 197 return addressing.DeriveCiliumIPv4(ip) 198 } 199 return addressing.DeriveCiliumIPv6(ip) 200 } 201 202 func (c *criClient) handleCreateWorkload(id string, retry bool) { 203 scopedLog := log.WithFields(logrus.Fields{ 204 logfields.ContainerID: shortContainerID(id), 205 fieldMaxRetry: EndpointCorrelationMaxRetries, 206 "willRetry": retry, 207 }) 208 209 for try := 1; try <= EndpointCorrelationMaxRetries; try++ { 210 retryLog := scopedLog.WithFields(logrus.Fields{ 211 "retry": try, 212 "podID": id, 213 }) 214 215 if try > 1 { 216 if retry { 217 retryLog.Debug("Waiting for endpoint representing pod to appear") 218 time.Sleep(EndpointCorrelationSleepTime(try)) 219 } else { 220 break 221 } 222 } 223 224 ctx := namespaces.WithNamespace(context.Background(), k8sContainerdNamespace) 225 pssr := criRuntime.PodSandboxStatusRequest{ 226 PodSandboxId: id, 227 } 228 cont, err := c.RuntimeServiceClient.PodSandboxStatus(ctx, &pssr) 229 if err != nil { 230 retryLog.WithError(err).Debugf("Unable to inspect pod %s after pod create event", id) 231 continue 232 } 233 234 pod := cont.GetStatus() 235 if pod.GetId() == "" { 236 retryLog.Warn("Container name not set in event from containerD") 237 } 238 239 ep := endpointmanager.LookupContainerID(id) 240 if ep == nil { 241 // Container ID is not yet known; try and find endpoint 242 // via one of the IP addresses assigned. 243 ep = c.getEndpointByPodIP(pod) 244 } 245 246 if ep == nil { 247 // Endpoint does not exist yet. This indicates that the 248 // orchestration system has not requested us to handle 249 // networking for this pod yet (or never will). 250 // We will retry a couple of times to wait for this to 251 // happen. 252 retryLog.Debug("Container event could not be associated with endpoint yet") 253 continue 254 } 255 256 retryLog.WithFields(logrus.Fields{ 257 logfields.EndpointID: ep.ID, 258 }).Debug("Associated container event with endpoint") 259 260 processCreateWorkload(ep, id, pod.Labels) 261 return 262 } 263 264 startIgnoringContainer(id) 265 266 scopedLog.Info("No request received to manage networking for container") 267 } 268 269 // IgnoreRunningWorkloads checks for already running containers and checks 270 // their IP address, then adds the containers to the list of ignored containers 271 // and allocates the IPs they are using to prevent future collisions. 272 func (c *criClient) IgnoreRunningWorkloads() { 273 if c == nil { 274 return 275 } 276 277 req := &criRuntime.ListPodSandboxRequest{} 278 279 resp, err := c.RuntimeServiceClient.ListPodSandbox(context.Background(), req) 280 if err != nil { 281 log.WithError(err).Error("unable to get list of pods running") 282 return 283 } 284 for _, pod := range resp.GetItems() { 285 scopedLog := log.WithField(logfields.ContainerID, pod.GetId()) 286 scopedLog.Info("Adding running container to the list of ignored containers") 287 startIgnoringContainer(pod.GetId()) 288 ctx := namespaces.WithNamespace(context.Background(), k8sContainerdNamespace) 289 pssr := criRuntime.PodSandboxStatusRequest{ 290 PodSandboxId: pod.GetId(), 291 } 292 cont, err := c.RuntimeServiceClient.PodSandboxStatus(ctx, &pssr) 293 if err != nil { 294 continue 295 } 296 cIP := c.getCiliumIP(cont.GetStatus()) 297 if cIP == nil { 298 continue 299 } 300 allocator.BlacklistIP(cIP.IP(), "ignored container: "+pod.GetId()) 301 //TODO Release this address when the ignored container leaves 302 scopedLog.WithFields(logrus.Fields{ 303 logfields.IPAddr: cIP.IP(), 304 }).Info("Found container running with potential " + 305 "collision IP address, adding to the list " + 306 "of allocated IPs") 307 } 308 } 309 310 // workloadIDsList returns a list of running workload IDs. 311 func (c *criClient) workloadIDsList(ctx context.Context) ([]string, error) { 312 ctx = namespaces.WithNamespace(ctx, k8sContainerdNamespace) 313 req := &criRuntime.ListPodSandboxRequest{} 314 resp, err := c.RuntimeServiceClient.ListPodSandbox(ctx, req) 315 if err != nil { 316 return nil, err 317 } 318 319 cont := []string{} 320 for _, pod := range resp.GetItems() { 321 if pod.GetId() != "" { 322 cont = append(cont, pod.GetId()) 323 } 324 } 325 return cont, nil 326 } 327 328 // GetAllInfraContainersPID returns a map that maps container IDs to the PID 329 // of that container. 330 func (c *criClient) GetAllInfraContainersPID() (map[string]int, error) { 331 return nil, errors.New("not implemented for cri") 332 }