github.com/cilium/cilium@v1.16.2/pkg/endpoint/events.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package endpoint 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "strconv" 11 12 "github.com/sirupsen/logrus" 13 14 "github.com/cilium/cilium/pkg/datapath/linux/bandwidth" 15 datapath "github.com/cilium/cilium/pkg/datapath/types" 16 "github.com/cilium/cilium/pkg/eventqueue" 17 "github.com/cilium/cilium/pkg/logging/logfields" 18 "github.com/cilium/cilium/pkg/option" 19 "github.com/cilium/cilium/pkg/policy" 20 ) 21 22 // EndpointRegenerationEvent contains all fields necessary to regenerate an endpoint. 23 type EndpointRegenerationEvent struct { 24 regenContext *regenerationContext 25 ep *Endpoint 26 } 27 28 // Handle handles the regeneration event for the endpoint. 29 func (ev *EndpointRegenerationEvent) Handle(res chan interface{}) { 30 e := ev.ep 31 regenContext := ev.regenContext 32 33 err := e.rlockAlive() 34 if err != nil { 35 e.logDisconnectedMutexAction(err, "before regeneration") 36 res <- &EndpointRegenerationResult{ 37 err: err, 38 } 39 40 return 41 } 42 e.runlock() 43 44 // We should only queue the request after we use all the endpoint's 45 // lock/unlock. Otherwise this can get a deadlock if the endpoint is 46 // being deleted at the same time. More info PR-1777. 47 doneFunc, err := e.owner.QueueEndpointBuild(regenContext.parentContext, uint64(e.ID)) 48 if err != nil { 49 if !errors.Is(err, context.Canceled) { 50 e.getLogger().WithError(err).Warning("unable to queue endpoint build") 51 } 52 } else if doneFunc != nil { 53 e.getLogger().Debug("Dequeued endpoint from build queue") 54 55 regenContext.DoneFunc = doneFunc 56 57 err = ev.ep.regenerate(ev.regenContext) 58 59 doneFunc() 60 e.notifyEndpointRegeneration(err) 61 } else { 62 // If another build has been queued for the endpoint, that means that 63 // that build will be able to take care of all of the work needed to 64 // regenerate the endpoint at this current point in time; queueing 65 // another build is a waste of resources. 66 e.getLogger().Debug("build not queued for endpoint because another build has already been queued") 67 } 68 69 res <- &EndpointRegenerationResult{ 70 err: err, 71 } 72 } 73 74 // EndpointRegenerationResult contains the results of an endpoint regeneration. 75 type EndpointRegenerationResult struct { 76 err error 77 } 78 79 // EndpointRevisionBumpEvent contains all fields necessary to bump the policy 80 // revision of a given endpoint. 81 type EndpointRevisionBumpEvent struct { 82 Rev uint64 83 ep *Endpoint 84 } 85 86 // Handle handles the revision bump event for the Endpoint. 87 func (ev *EndpointRevisionBumpEvent) Handle(res chan interface{}) { 88 // TODO: if the endpoint is not in a 'ready' state that means that 89 // we cannot set the policy revision, as something else has 90 // changed endpoint state which necessitates regeneration, 91 // *or* the endpoint is in a not-ready state (i.e., a prior 92 // regeneration failed, so there is no way that we can 93 // realize the policy revision yet. Should this be signaled 94 // to the routine waiting for the result of this event? 95 ev.ep.SetPolicyRevision(ev.Rev) 96 res <- struct{}{} 97 } 98 99 // PolicyRevisionBumpEvent queues an event for the given endpoint to set its 100 // realized policy revision to rev. This may block depending on if events have 101 // been queued up for the given endpoint. It blocks until the event has 102 // succeeded, or if the event has been cancelled. 103 func (e *Endpoint) PolicyRevisionBumpEvent(rev uint64) { 104 epBumpEvent := eventqueue.NewEvent(&EndpointRevisionBumpEvent{Rev: rev, ep: e}) 105 // Don't check policy revision event results - it is best effort. 106 _, err := e.eventQueue.Enqueue(epBumpEvent) 107 if err != nil { 108 log.WithFields(logrus.Fields{ 109 logfields.PolicyRevision: rev, 110 logfields.EndpointID: e.ID, 111 }).Errorf("enqueue of EndpointRevisionBumpEvent failed: %s", err) 112 } 113 } 114 115 // EndpointNoTrackEvent contains all fields necessary to update the NOTRACK rules. 116 type EndpointNoTrackEvent struct { 117 ep *Endpoint 118 annoCB AnnotationsResolverCB 119 } 120 121 // Handle handles the NOTRACK rule update. 122 func (ev *EndpointNoTrackEvent) Handle(res chan interface{}) { 123 var port uint16 124 125 e := ev.ep 126 127 // If this endpoint is going away, nothing to do. 128 if err := e.lockAlive(); err != nil { 129 res <- &EndpointRegenerationResult{ 130 err: nil, 131 } 132 return 133 } 134 135 defer e.unlock() 136 137 portStr, err := ev.annoCB(e.K8sNamespace, e.K8sPodName) 138 if err != nil { 139 res <- &EndpointRegenerationResult{ 140 err: err, 141 } 142 return 143 } 144 145 if portStr == "" { 146 port = 0 147 } else { 148 // Validate annotation before we do any actual alteration to the endpoint. 149 p64, err := strconv.ParseUint(portStr, 10, 16) 150 // Port should be within [1-65535]. 151 if err != nil || p64 == 0 { 152 res <- &EndpointRegenerationResult{ 153 err: err, 154 } 155 return 156 } 157 port = uint16(p64) 158 } 159 160 if port != e.noTrackPort { 161 log.Debug("Updating NOTRACK rules") 162 if e.IPv4.IsValid() { 163 if port > 0 { 164 e.owner.Datapath().InstallNoTrackRules(e.IPv4, port) 165 } 166 if e.noTrackPort > 0 { 167 e.owner.Datapath().RemoveNoTrackRules(e.IPv4, e.noTrackPort) 168 } 169 } 170 if e.IPv6.IsValid() { 171 if port > 0 { 172 e.owner.Datapath().InstallNoTrackRules(e.IPv6, port) 173 } 174 if e.noTrackPort > 0 { 175 e.owner.Datapath().RemoveNoTrackRules(e.IPv6, e.noTrackPort) 176 } 177 } 178 e.noTrackPort = port 179 } 180 181 res <- &EndpointRegenerationResult{ 182 err: nil, 183 } 184 } 185 186 // EndpointPolicyVisibilityEvent contains all fields necessary to update the 187 // visibility policy. 188 type EndpointPolicyVisibilityEvent struct { 189 ep *Endpoint 190 annoCB AnnotationsResolverCB 191 } 192 193 // Handle handles the policy visibility update. 194 func (ev *EndpointPolicyVisibilityEvent) Handle(res chan interface{}) { 195 e := ev.ep 196 197 if err := e.lockAlive(); err != nil { 198 // If the endpoint is being deleted, we don't need to update its 199 // visibility policy. 200 res <- &EndpointRegenerationResult{ 201 err: nil, 202 } 203 return 204 } 205 206 defer func() { 207 // Ensure that policy computation is performed so that endpoint 208 // desiredPolicy and realizedPolicy pointers are different. This state 209 // is needed to update endpoint policy maps with the policy map state 210 // generated from the visibility policy. This can, and should be more 211 // elegant in the future. 212 e.forcePolicyComputation() 213 e.unlock() 214 }() 215 216 var ( 217 nvp *policy.VisibilityPolicy 218 err error 219 ) 220 221 proxyVisibility, err := ev.annoCB(e.K8sNamespace, e.K8sPodName) 222 if err != nil { 223 res <- &EndpointRegenerationResult{ 224 err: err, 225 } 226 return 227 } 228 if proxyVisibility != "" { 229 if e.IsProxyDisabled() { 230 e.getLogger(). 231 WithField(logfields.EndpointID, e.GetID()). 232 Warn("ignoring L7 proxy visibility policy as L7 proxy is disabled") 233 res <- &EndpointRegenerationResult{ 234 err: nil, 235 } 236 return 237 } 238 e.getLogger().Debug("creating visibility policy") 239 nvp, err = policy.NewVisibilityPolicy(proxyVisibility, e.K8sNamespace, e.K8sPodName) 240 if err != nil { 241 e.getLogger().WithError(err).Warning("unable to parse annotations into visibility policy; disabling visibility policy for endpoint") 242 e.visibilityPolicy = &policy.VisibilityPolicy{ 243 Ingress: make(policy.DirectionalVisibilityPolicy), 244 Egress: make(policy.DirectionalVisibilityPolicy), 245 Error: err, 246 } 247 res <- &EndpointRegenerationResult{ 248 err: nil, 249 } 250 return 251 } 252 } 253 254 e.visibilityPolicy = nvp 255 res <- &EndpointRegenerationResult{ 256 err: nil, 257 } 258 } 259 260 // EndpointPolicyBandwidthEvent contains all fields necessary to update 261 // the Pod's bandwidth policy. 262 type EndpointPolicyBandwidthEvent struct { 263 bwm datapath.BandwidthManager 264 ep *Endpoint 265 annoCB AnnotationsResolverCB 266 } 267 268 // Handle handles the policy bandwidth update. 269 func (ev *EndpointPolicyBandwidthEvent) Handle(res chan interface{}) { 270 var bps uint64 271 272 if !ev.bwm.Enabled() { 273 res <- &EndpointRegenerationResult{ 274 err: nil, 275 } 276 return 277 } 278 279 e := ev.ep 280 if err := e.lockAlive(); err != nil { 281 // If the endpoint is being deleted, we don't need to 282 // update its bandwidth policy. 283 res <- &EndpointRegenerationResult{ 284 err: nil, 285 } 286 return 287 } 288 defer func() { 289 e.unlock() 290 }() 291 292 bandwidthEgress, err := ev.annoCB(e.K8sNamespace, e.K8sPodName) 293 if err != nil { 294 res <- &EndpointRegenerationResult{ 295 err: err, 296 } 297 return 298 } 299 if bandwidthEgress != "" { 300 bps, err = bandwidth.GetBytesPerSec(bandwidthEgress) 301 if err == nil { 302 ev.bwm.UpdateBandwidthLimit(e.ID, bps) 303 } else { 304 e.getLogger().WithError(err).Debugf("failed to parse bandwidth limit %q", bandwidthEgress) 305 } 306 } else { 307 ev.bwm.DeleteBandwidthLimit(e.ID) 308 } 309 if err != nil { 310 res <- &EndpointRegenerationResult{ 311 err: err, 312 } 313 return 314 } 315 316 bpsOld := "inf" 317 bpsNew := "inf" 318 if e.bps != 0 { 319 bpsOld = strconv.FormatUint(e.bps, 10) 320 } 321 if bps != 0 { 322 bpsNew = strconv.FormatUint(bps, 10) 323 } 324 e.getLogger().Debugf("Updating %s from %s to %s bytes/sec", bandwidth.EgressBandwidth, 325 bpsOld, bpsNew) 326 e.bps = bps 327 res <- &EndpointRegenerationResult{ 328 err: nil, 329 } 330 } 331 332 // InitEventQueue initializes the endpoint's event queue. Note that this 333 // function does not begin processing events off the queue, as that's left up 334 // to the caller to call Expose in order to allow other subsystems to access 335 // the endpoint. This function assumes that the endpoint ID has already been 336 // allocated! 337 // 338 // Having this be a separate function allows us to prepare 339 // the event queue while the endpoint is being validated (during restoration) 340 // so that when its metadata is resolved, events can be enqueued (such as 341 // visibility policy and bandwidth policy). 342 func (e *Endpoint) InitEventQueue() { 343 e.eventQueue = eventqueue.NewEventQueueBuffered(fmt.Sprintf("endpoint-%d", e.ID), option.Config.EndpointQueueSize) 344 } 345 346 // Start assigns a Cilium Endpoint ID to the endpoint and prepares it to 347 // receive events from other subsystems. 348 // 349 // The endpoint must not already be exposed via the endpointmanager prior to 350 // calling Start(), as it assumes unconditional access over the Endpoint 351 // object. 352 func (e *Endpoint) Start(id uint16) { 353 // No need to check liveness as an endpoint can only be deleted via the 354 // API after it has been inserted into the manager. 355 // 'e.ID' written below, read lock is not enough. 356 e.unconditionalLock() 357 defer e.unlock() 358 359 e.ID = id 360 e.UpdateLogger(map[string]interface{}{ 361 logfields.EndpointID: e.ID, 362 }) 363 364 // Start goroutines that are responsible for handling events. 365 e.startRegenerationFailureHandler() 366 if e.eventQueue == nil { 367 e.InitEventQueue() 368 } 369 e.eventQueue.Run() 370 e.getLogger().Info("New endpoint") 371 } 372 373 // Stop cleans up all goroutines managed by this endpoint (EventQueue, 374 // Controllers). 375 // This function should be used directly in cleanup functions which aim to stop 376 // goroutines managed by this endpoint, but without removing BPF maps and 377 // datapath state (for instance, because the daemon is shutting down but the 378 // endpoint should remain operational while the daemon is not running). 379 func (e *Endpoint) Stop() { 380 // Since the endpoint is being deleted, we no longer need to run events 381 // in its event queue. This is a no-op if the queue has already been 382 // closed elsewhere. 383 e.eventQueue.Stop() 384 385 // Cancel active controllers for the endpoint tied to e.aliveCtx. 386 // Needs to be performed before draining the event queue to allow 387 // in-flight functions to act before the Endpoint's underlying resources 388 // are removed by the container runtime. 389 e.aliveCancel() 390 391 // Wait for the queue to be drained in case an event which is currently 392 // running for the endpoint tries to acquire the lock - we cannot be sure 393 // what types of events will be pushed onto the EventQueue for an endpoint 394 // and when they will happen. After this point, no events for the endpoint 395 // will be processed on its EventQueue, specifically regenerations. 396 e.eventQueue.WaitToBeDrained() 397 398 // Given that we are deleting the endpoint and that no more builds are 399 // going to occur for this endpoint, close the channel which signals whether 400 // the endpoint has its BPF program compiled or not to avoid it persisting 401 // if anything is blocking on it. If a delete request has already been 402 // enqueued for this endpoint, this is a no-op. 403 e.closeBPFProgramChannel() 404 }