github.com/fafucoder/cilium@v1.6.11/pkg/endpointmanager/manager.go (about) 1 // Copyright 2016-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package endpointmanager 16 17 import ( 18 "context" 19 "fmt" 20 "net" 21 "sync" 22 "time" 23 24 "github.com/cilium/cilium/pkg/completion" 25 "github.com/cilium/cilium/pkg/endpoint" 26 endpointid "github.com/cilium/cilium/pkg/endpoint/id" 27 "github.com/cilium/cilium/pkg/endpoint/regeneration" 28 "github.com/cilium/cilium/pkg/eventqueue" 29 "github.com/cilium/cilium/pkg/lock" 30 "github.com/cilium/cilium/pkg/logging" 31 "github.com/cilium/cilium/pkg/logging/logfields" 32 "github.com/cilium/cilium/pkg/metrics" 33 monitorAPI "github.com/cilium/cilium/pkg/monitor/api" 34 "github.com/cilium/cilium/pkg/option" 35 "github.com/cilium/cilium/pkg/policy" 36 37 "github.com/prometheus/client_golang/prometheus" 38 "github.com/sirupsen/logrus" 39 ) 40 41 var ( 42 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "endpoint-manager") 43 44 // mutex protects endpoints and endpointsAux 45 mutex lock.RWMutex 46 47 // endpoints is the global list of endpoints indexed by ID. mutex must 48 // be held to read and write. 49 endpoints = map[uint16]*endpoint.Endpoint{} 50 endpointsAux = map[string]*endpoint.Endpoint{} 51 52 // EndpointSynchronizer updates external resources (e.g., Kubernetes) with 53 // up-to-date information about endpoints managed by the endpoint manager. 54 EndpointSynchronizer EndpointResourceSynchronizer 55 ) 56 57 // EndpointResourceSynchronizer is an interface which synchronizes CiliumEndpoint 58 // resources with Kubernetes. 59 type EndpointResourceSynchronizer interface { 60 RunK8sCiliumEndpointSync(ep *endpoint.Endpoint) 61 } 62 63 func init() { 64 // EndpointCount is a function used to collect this metric. We cannot 65 // increment/decrement a gauge since we invoke Remove gratuitiously and that 66 // would result in negative counts. 67 // It must be thread-safe. 68 metrics.EndpointCount = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ 69 Namespace: metrics.Namespace, 70 Name: "endpoint_count", 71 Help: "Number of endpoints managed by this agent", 72 }, 73 func() float64 { return float64(len(GetEndpoints())) }, 74 ) 75 metrics.MustRegister(metrics.EndpointCount) 76 } 77 78 // waitForProxyCompletions blocks until all proxy changes have been completed. 79 func waitForProxyCompletions(proxyWaitGroup *completion.WaitGroup) error { 80 err := proxyWaitGroup.Context().Err() 81 if err != nil { 82 return fmt.Errorf("context cancelled before waiting for proxy updates: %s", err) 83 } 84 85 start := time.Now() 86 log.Debug("Waiting for proxy updates to complete...") 87 err = proxyWaitGroup.Wait() 88 if err != nil { 89 return fmt.Errorf("proxy updates failed: %s", err) 90 } 91 log.Debug("Wait time for proxy updates: ", time.Since(start)) 92 93 return nil 94 } 95 96 // UpdatePolicyMaps returns a WaitGroup which is signaled upon once all endpoints 97 // have had their PolicyMaps updated against the Endpoint's desired policy state. 98 func UpdatePolicyMaps(ctx context.Context) *sync.WaitGroup { 99 var epWG sync.WaitGroup 100 var wg sync.WaitGroup 101 102 proxyWaitGroup := completion.NewWaitGroup(ctx) 103 104 eps := GetEndpoints() 105 epWG.Add(len(eps)) 106 wg.Add(1) 107 108 // This is in a goroutine to allow the caller to proceed with other tasks before waiting for the ACKs to complete 109 go func() { 110 // Wait for all the eps to have applied policy map 111 // changes before waiting for the changes to be ACKed 112 epWG.Wait() 113 if err := waitForProxyCompletions(proxyWaitGroup); err != nil { 114 log.WithError(err).Warning("Failed to apply L7 proxy policy changes. These will be re-applied in future updates.") 115 } 116 wg.Done() 117 }() 118 119 // TODO: bound by number of CPUs? 120 for _, ep := range eps { 121 go func(ep *endpoint.Endpoint) { 122 if err := ep.ApplyPolicyMapChanges(proxyWaitGroup); err != nil { 123 ep.Logger("endpointmanager").WithError(err).Warning("Failed to apply policy map changes. These will be re-applied in future updates.") 124 } 125 epWG.Done() 126 }(ep) 127 } 128 129 return &wg 130 } 131 132 // Insert inserts the endpoint into the global maps. 133 func Insert(ep *endpoint.Endpoint) error { 134 if ep.ID != 0 { 135 if err := endpointid.Reuse(ep.ID); err != nil { 136 return fmt.Errorf("unable to reuse endpoint ID: %s", err) 137 } 138 } else { 139 id := endpointid.Allocate() 140 if id == uint16(0) { 141 return fmt.Errorf("no more endpoint IDs available") 142 } 143 ep.ID = id 144 145 ep.UpdateLogger(map[string]interface{}{ 146 logfields.EndpointID: ep.ID, 147 }) 148 } 149 150 // No need to check liveness as an endpoint can only be deleted via the 151 // API after it has been inserted into the manager. 152 ep.UnconditionalRLock() 153 mutex.Lock() 154 155 ep.StartRegenerationFailureHandler() 156 // Now that the endpoint has its ID, it can be created with a name based on 157 // its ID, and its eventqueue can be safely started. Ensure that it is only 158 // started once it is exposed to the endpointmanager so that it will be 159 // stopped when the endpoint is removed from the endpointmanager. 160 ep.EventQueue = eventqueue.NewEventQueueBuffered(fmt.Sprintf("endpoint-%d", ep.ID), option.Config.EndpointQueueSize) 161 ep.EventQueue.Run() 162 163 endpoints[ep.ID] = ep 164 updateReferences(ep) 165 166 mutex.Unlock() 167 ep.RUnlock() 168 169 if EndpointSynchronizer != nil { 170 EndpointSynchronizer.RunK8sCiliumEndpointSync(ep) 171 } 172 173 ep.InsertEvent() 174 175 return nil 176 } 177 178 // Lookup looks up the endpoint by prefix id 179 func Lookup(id string) (*endpoint.Endpoint, error) { 180 mutex.RLock() 181 defer mutex.RUnlock() 182 183 prefix, eid, err := endpointid.Parse(id) 184 if err != nil { 185 return nil, err 186 } 187 188 switch prefix { 189 case endpointid.CiliumLocalIdPrefix: 190 n, err := endpointid.ParseCiliumID(id) 191 if err != nil { 192 return nil, err 193 } 194 return lookupCiliumID(uint16(n)), nil 195 196 case endpointid.CiliumGlobalIdPrefix: 197 return nil, ErrUnsupportedID 198 199 case endpointid.ContainerIdPrefix: 200 return lookupContainerID(eid), nil 201 202 case endpointid.DockerEndpointPrefix: 203 return lookupDockerEndpoint(eid), nil 204 205 case endpointid.ContainerNamePrefix: 206 return lookupDockerContainerName(eid), nil 207 208 case endpointid.PodNamePrefix: 209 return lookupPodNameLocked(eid), nil 210 211 case endpointid.IPv4Prefix: 212 return lookupIPv4(eid), nil 213 214 case endpointid.IPv6Prefix: 215 return lookupIPv6(eid), nil 216 217 default: 218 return nil, ErrInvalidPrefix{InvalidPrefix: prefix.String()} 219 } 220 } 221 222 // LookupCiliumID looks up endpoint by endpoint ID 223 func LookupCiliumID(id uint16) *endpoint.Endpoint { 224 mutex.RLock() 225 ep := lookupCiliumID(id) 226 mutex.RUnlock() 227 return ep 228 } 229 230 // LookupContainerID looks up endpoint by Docker ID 231 func LookupContainerID(id string) *endpoint.Endpoint { 232 mutex.RLock() 233 ep := lookupContainerID(id) 234 mutex.RUnlock() 235 return ep 236 } 237 238 // LookupIPv4 looks up endpoint by IPv4 address 239 func LookupIPv4(ipv4 string) *endpoint.Endpoint { 240 mutex.RLock() 241 ep := lookupIPv4(ipv4) 242 mutex.RUnlock() 243 return ep 244 } 245 246 // LookupIPv6 looks up endpoint by IPv6 address 247 func LookupIPv6(ipv6 string) *endpoint.Endpoint { 248 mutex.RLock() 249 ep := lookupIPv6(ipv6) 250 mutex.RUnlock() 251 return ep 252 } 253 254 // LookupIP looks up endpoint by IP address 255 func LookupIP(ip net.IP) (ep *endpoint.Endpoint) { 256 addr := ip.String() 257 mutex.RLock() 258 if ip.To4() != nil { 259 ep = lookupIPv4(addr) 260 } else { 261 ep = lookupIPv6(addr) 262 } 263 mutex.RUnlock() 264 return ep 265 } 266 267 // LookupPodName looks up endpoint by namespace + pod name 268 func LookupPodName(name string) *endpoint.Endpoint { 269 mutex.RLock() 270 ep := lookupPodNameLocked(name) 271 mutex.RUnlock() 272 return ep 273 } 274 275 // UpdateReferences makes an endpoint available by all possible reference 276 // fields as available for this endpoint (containerID, IPv4 address, ...) 277 // Must be called with ep.Mutex.RLock held. 278 func UpdateReferences(ep *endpoint.Endpoint) { 279 mutex.Lock() 280 defer mutex.Unlock() 281 updateReferences(ep) 282 } 283 284 func releaseID(ep *endpoint.Endpoint) { 285 if err := endpointid.Release(ep.ID); err != nil { 286 // While restoring, endpoint IDs may not have been reused yet. 287 // Failure to release means that the endpoint ID was not reused 288 // yet. 289 // 290 // While endpoint is disconnecting, ID is already available in ID cache. 291 // 292 // Avoid irritating warning messages. 293 state := ep.GetState() 294 if state != endpoint.StateRestoring && state != endpoint.StateDisconnecting { 295 log.WithError(err).WithField("state", state).Warning("Unable to release endpoint ID") 296 } 297 } 298 } 299 300 // WaitEndpointRemoved waits until all operations associated with Remove of 301 // the endpoint have been completed. 302 func WaitEndpointRemoved(ep *endpoint.Endpoint) { 303 select { 304 case <-Remove(ep): 305 return 306 } 307 } 308 309 // Remove removes the endpoint from the global maps and releases the node-local 310 // ID allocated for the endpoint. 311 // Must be called with ep.Mutex.RLock held. Releasing of the ID of the endpoint 312 // is done asynchronously. Once the ID of the endpoint is released, the returned 313 // channel is closed. 314 func Remove(ep *endpoint.Endpoint) <-chan struct{} { 315 316 epRemoved := make(chan struct{}) 317 318 mutex.Lock() 319 defer mutex.Unlock() 320 321 // This must be done before the ID is released for the endpoint! 322 delete(endpoints, ep.ID) 323 324 go func(ep *endpoint.Endpoint) { 325 326 // The endpoint's EventQueue may not be stopped yet (depending on whether 327 // the caller of the EventQueue has stopped it or not). Call it here 328 // to be safe so that ep.WaitToBeDrained() does not hang forever. 329 ep.EventQueue.Stop() 330 331 // Wait for no more events (primarily regenerations) to be occurring for 332 // this endpoint. 333 ep.EventQueue.WaitToBeDrained() 334 335 releaseID(ep) 336 close(epRemoved) 337 }(ep) 338 339 if ep.ContainerID != "" { 340 delete(endpointsAux, endpointid.NewID(endpointid.ContainerIdPrefix, ep.ContainerID)) 341 } 342 343 if ep.DockerEndpointID != "" { 344 delete(endpointsAux, endpointid.NewID(endpointid.DockerEndpointPrefix, ep.DockerEndpointID)) 345 } 346 347 if ep.IPv4.IsSet() { 348 delete(endpointsAux, endpointid.NewID(endpointid.IPv4Prefix, ep.IPv4.String())) 349 } 350 351 if ep.IPv6.IsSet() { 352 delete(endpointsAux, endpointid.NewID(endpointid.IPv6Prefix, ep.IPv6.String())) 353 } 354 355 if ep.ContainerName != "" { 356 delete(endpointsAux, endpointid.NewID(endpointid.ContainerNamePrefix, ep.ContainerName)) 357 } 358 359 if podName := ep.GetK8sNamespaceAndPodNameLocked(); podName != "" { 360 delete(endpointsAux, endpointid.NewID(endpointid.PodNamePrefix, podName)) 361 } 362 return epRemoved 363 } 364 365 // RemoveAll removes all endpoints from the global maps. 366 func RemoveAll() { 367 mutex.Lock() 368 defer mutex.Unlock() 369 endpointid.ReallocatePool() 370 endpoints = map[uint16]*endpoint.Endpoint{} 371 endpointsAux = map[string]*endpoint.Endpoint{} 372 } 373 374 // lookupCiliumID looks up endpoint by endpoint ID 375 func lookupCiliumID(id uint16) *endpoint.Endpoint { 376 if ep, ok := endpoints[id]; ok { 377 return ep 378 } 379 return nil 380 } 381 382 func lookupDockerEndpoint(id string) *endpoint.Endpoint { 383 if ep, ok := endpointsAux[endpointid.NewID(endpointid.DockerEndpointPrefix, id)]; ok { 384 return ep 385 } 386 return nil 387 } 388 389 func lookupPodNameLocked(name string) *endpoint.Endpoint { 390 if ep, ok := endpointsAux[endpointid.NewID(endpointid.PodNamePrefix, name)]; ok { 391 return ep 392 } 393 return nil 394 } 395 396 func lookupDockerContainerName(name string) *endpoint.Endpoint { 397 if ep, ok := endpointsAux[endpointid.NewID(endpointid.ContainerNamePrefix, name)]; ok { 398 return ep 399 } 400 return nil 401 } 402 403 func lookupIPv4(ipv4 string) *endpoint.Endpoint { 404 if ep, ok := endpointsAux[endpointid.NewID(endpointid.IPv4Prefix, ipv4)]; ok { 405 return ep 406 } 407 return nil 408 } 409 410 func lookupIPv6(ipv6 string) *endpoint.Endpoint { 411 if ep, ok := endpointsAux[endpointid.NewID(endpointid.IPv6Prefix, ipv6)]; ok { 412 return ep 413 } 414 return nil 415 } 416 417 func lookupContainerID(id string) *endpoint.Endpoint { 418 if ep, ok := endpointsAux[endpointid.NewID(endpointid.ContainerIdPrefix, id)]; ok { 419 return ep 420 } 421 return nil 422 } 423 424 // UpdateReferences updates the mappings of various values to their corresponding 425 // endpoints, such as ContainerID, Docker Container Name, Pod Name, etc. 426 func updateReferences(ep *endpoint.Endpoint) { 427 if ep.ContainerID != "" { 428 endpointsAux[endpointid.NewID(endpointid.ContainerIdPrefix, ep.ContainerID)] = ep 429 } 430 431 if ep.DockerEndpointID != "" { 432 endpointsAux[endpointid.NewID(endpointid.DockerEndpointPrefix, ep.DockerEndpointID)] = ep 433 } 434 435 if ep.IPv4.IsSet() { 436 endpointsAux[endpointid.NewID(endpointid.IPv4Prefix, ep.IPv4.String())] = ep 437 } 438 439 if ep.IPv6.IsSet() { 440 endpointsAux[endpointid.NewID(endpointid.IPv6Prefix, ep.IPv6.String())] = ep 441 } 442 443 if ep.ContainerName != "" { 444 endpointsAux[endpointid.NewID(endpointid.ContainerNamePrefix, ep.ContainerName)] = ep 445 } 446 447 if podName := ep.GetK8sNamespaceAndPodNameLocked(); podName != "" { 448 endpointsAux[endpointid.NewID(endpointid.PodNamePrefix, podName)] = ep 449 } 450 } 451 452 // RegenerateAllEndpoints calls a SetStateLocked for each endpoint and 453 // regenerates if state transaction is valid. During this process, the endpoint 454 // list is locked and cannot be modified. 455 // Returns a waiting group that can be used to know when all the endpoints are 456 // regenerated. 457 func RegenerateAllEndpoints(regenMetadata *regeneration.ExternalRegenerationMetadata) *sync.WaitGroup { 458 var wg sync.WaitGroup 459 460 eps := GetEndpoints() 461 wg.Add(len(eps)) 462 463 // Dereference "reason" field outside of logging statement; see 464 // https://github.com/sirupsen/logrus/issues/1003. 465 reason := regenMetadata.Reason 466 log.WithFields(logrus.Fields{"reason": reason}).Info("regenerating all endpoints") 467 for _, ep := range eps { 468 go func(ep *endpoint.Endpoint) { 469 <-ep.RegenerateIfAlive(regenMetadata) 470 wg.Done() 471 }(ep) 472 } 473 474 return &wg 475 } 476 477 // HasGlobalCT returns true if the endpoints have a global CT, false otherwise. 478 func HasGlobalCT() bool { 479 eps := GetEndpoints() 480 for _, e := range eps { 481 if !e.Options.IsEnabled(option.ConntrackLocal) { 482 return true 483 } 484 } 485 return false 486 } 487 488 // GetEndpoints returns a slice of all endpoints present in endpoint manager. 489 func GetEndpoints() []*endpoint.Endpoint { 490 mutex.RLock() 491 eps := make([]*endpoint.Endpoint, 0, len(endpoints)) 492 for _, ep := range endpoints { 493 eps = append(eps, ep) 494 } 495 mutex.RUnlock() 496 return eps 497 } 498 499 // GetPolicyEndpoints returns a map of all endpoints present in endpoint 500 // manager as policy.Endpoint interface set for the map key. 501 func GetPolicyEndpoints() map[policy.Endpoint]struct{} { 502 mutex.RLock() 503 eps := make(map[policy.Endpoint]struct{}, len(endpoints)) 504 for _, ep := range endpoints { 505 eps[ep] = struct{}{} 506 } 507 mutex.RUnlock() 508 return eps 509 } 510 511 // AddEndpoint takes the prepared endpoint object and starts managing it. 512 func AddEndpoint(owner regeneration.Owner, ep *endpoint.Endpoint, reason string) (err error) { 513 alwaysEnforce := policy.GetPolicyEnabled() == option.AlwaysEnforce 514 ep.SetDesiredIngressPolicyEnabled(alwaysEnforce) 515 ep.SetDesiredEgressPolicyEnabled(alwaysEnforce) 516 517 if ep.ID != 0 { 518 return fmt.Errorf("Endpoint ID is already set to %d", ep.ID) 519 } 520 err = Insert(ep) 521 if err != nil { 522 return err 523 } 524 525 repr, err := monitorAPI.EndpointCreateRepr(ep) 526 // Ignore endpoint creation if EndpointCreateRepr != nil 527 if err == nil { 528 owner.SendNotification(monitorAPI.AgentNotifyEndpointCreated, repr) 529 } 530 return nil 531 } 532 533 // WaitForEndpointsAtPolicyRev waits for all endpoints which existed at the time 534 // this function is called to be at a given policy revision. 535 // New endpoints appearing while waiting are ignored. 536 func WaitForEndpointsAtPolicyRev(ctx context.Context, rev uint64) error { 537 eps := GetEndpoints() 538 for i := range eps { 539 select { 540 case <-ctx.Done(): 541 return ctx.Err() 542 case <-eps[i].WaitForPolicyRevision(ctx, rev, nil): 543 if ctx.Err() != nil { 544 return ctx.Err() 545 } 546 } 547 } 548 return nil 549 } 550 551 // CallbackForEndpointsAtPolicyRev registers a callback on all endpoints that 552 // exist when invoked. It is similar to WaitForEndpointsAtPolicyRevision but 553 // each endpoint that reaches the desired revision calls 'done' independently. 554 // The provided callback should not block and generally be lightweight. 555 func CallbackForEndpointsAtPolicyRev(ctx context.Context, rev uint64, done func(time.Time)) error { 556 eps := GetEndpoints() 557 for i := range eps { 558 eps[i].WaitForPolicyRevision(ctx, rev, done) 559 } 560 return nil 561 }