istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/autoregistration/controller.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package autoregistration 16 17 import ( 18 "context" 19 "fmt" 20 "math" 21 "strings" 22 "time" 23 24 "golang.org/x/time/rate" 25 "google.golang.org/grpc/codes" 26 grpcstatus "google.golang.org/grpc/status" 27 "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 kubetypes "k8s.io/apimachinery/pkg/types" 30 31 "istio.io/api/annotation" 32 "istio.io/api/networking/v1alpha3" 33 "istio.io/istio/pilot/pkg/autoregistration/internal/health" 34 "istio.io/istio/pilot/pkg/autoregistration/internal/state" 35 "istio.io/istio/pilot/pkg/features" 36 "istio.io/istio/pilot/pkg/model" 37 "istio.io/istio/pilot/pkg/model/status" 38 "istio.io/istio/pilot/pkg/networking/util" 39 "istio.io/istio/pkg/config" 40 "istio.io/istio/pkg/config/schema/gvk" 41 "istio.io/istio/pkg/kube/controllers" 42 istiolog "istio.io/istio/pkg/log" 43 "istio.io/istio/pkg/monitoring" 44 "istio.io/istio/pkg/queue" 45 ) 46 47 var ( 48 autoRegistrationSuccess = monitoring.NewSum( 49 "auto_registration_success_total", 50 "Total number of successful auto registrations.", 51 ) 52 53 autoRegistrationUpdates = monitoring.NewSum( 54 "auto_registration_updates_total", 55 "Total number of auto registration updates.", 56 ) 57 58 autoRegistrationUnregistrations = monitoring.NewSum( 59 "auto_registration_unregister_total", 60 "Total number of unregistrations.", 61 ) 62 63 autoRegistrationDeletes = monitoring.NewSum( 64 "auto_registration_deletes_total", 65 "Total number of auto registration cleaned up by periodic timer.", 66 ) 67 68 autoRegistrationErrors = monitoring.NewSum( 69 "auto_registration_errors_total", 70 "Total number of auto registration errors.", 71 ) 72 ) 73 74 const ( 75 timeFormat = time.RFC3339Nano 76 // maxRetries is the number of times a service will be retried before it is dropped out of the queue. 77 // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the 78 // sequence of delays between successive queuings of a service. 79 // 80 // 5ms, 10ms, 20ms, 40ms, 80ms 81 maxRetries = 5 82 ) 83 84 var log = istiolog.RegisterScope("wle", "wle controller debugging") 85 86 type Controller struct { 87 instanceID string 88 // TODO move WorkloadEntry related tasks into their own object and give InternalGen a reference. 89 // store should either be k8s (for running pilot) or in-memory (for tests). MCP and other config store implementations 90 // do not support writing. We only use it here for reading WorkloadEntry/WorkloadGroup. 91 store model.ConfigStoreController 92 93 // Note: unregister is to update the workload entry status: like setting `istio.io/disconnectedAt` 94 // and make the workload entry enqueue `cleanupQueue` 95 // cleanup is to delete the workload entry 96 97 // queue contains workloadEntry that need to be unregistered 98 queue controllers.Queue 99 // cleanupLimit rate limit's auto registered WorkloadEntry cleanup calls to k8s 100 cleanupLimit *rate.Limiter 101 // cleanupQueue delays the cleanup of auto registered WorkloadEntries to allow for grace period 102 cleanupQueue queue.Delayed 103 104 adsConnections *adsConnections 105 lateRegistrationQueue controllers.Queue 106 107 // maxConnectionAge is a duration that workload entry should be cleaned up if it does not reconnects. 108 maxConnectionAge time.Duration 109 110 stateStore *state.Store 111 healthController *health.Controller 112 } 113 114 type HealthEvent = health.HealthEvent 115 116 // NewController create a controller which manages workload lifecycle and health status. 117 func NewController(store model.ConfigStoreController, instanceID string, maxConnAge time.Duration) *Controller { 118 if !features.WorkloadEntryAutoRegistration && !features.WorkloadEntryHealthChecks { 119 return nil 120 } 121 122 if maxConnAge != math.MaxInt64 { 123 maxConnAge += maxConnAge / 2 124 // if overflow, set it to max int64 125 if maxConnAge < 0 { 126 maxConnAge = time.Duration(math.MaxInt64) 127 } 128 } 129 c := &Controller{ 130 instanceID: instanceID, 131 store: store, 132 cleanupLimit: rate.NewLimiter(rate.Limit(20), 1), 133 cleanupQueue: queue.NewDelayed(), 134 adsConnections: newAdsConnections(), 135 maxConnectionAge: maxConnAge, 136 } 137 c.queue = controllers.NewQueue("unregister_workloadentry", 138 controllers.WithMaxAttempts(maxRetries), 139 controllers.WithGenericReconciler(c.unregisterWorkload)) 140 c.stateStore = state.NewStore(store, c) 141 c.healthController = health.NewController(c.stateStore, maxRetries) 142 c.setupAutoRecreate() 143 return c 144 } 145 146 func (c *Controller) Run(stop <-chan struct{}) { 147 if c == nil { 148 return 149 } 150 if c.store != nil && c.cleanupQueue != nil { 151 go c.periodicWorkloadEntryCleanup(stop) 152 go c.cleanupQueue.Run(stop) 153 } 154 if features.WorkloadEntryAutoRegistration { 155 go c.lateRegistrationQueue.Run(stop) 156 } 157 158 go c.queue.Run(stop) 159 go c.healthController.Run(stop) 160 <-stop 161 } 162 163 // workItem contains the state of a "disconnect" event used to unregister a workload. 164 type workItem struct { 165 entryName string 166 autoCreated bool 167 proxy *model.Proxy 168 disConTime time.Time 169 origConTime time.Time 170 } 171 172 // setupAutoRecreate adds a handler to create entries for existing connections when a WG is added 173 func (c *Controller) setupAutoRecreate() { 174 if !features.WorkloadEntryAutoRegistration { 175 return 176 } 177 c.lateRegistrationQueue = controllers.NewQueue("auto-register existing connections", 178 controllers.WithReconciler(func(key kubetypes.NamespacedName) error { 179 log.Debugf("(%s) processing WorkloadGroup add for %s/%s", c.instanceID, key.Namespace, key.Name) 180 // WorkloadGroup doesn't exist anymore, skip this. 181 if c.store.Get(gvk.WorkloadGroup, key.Name, key.Namespace) == nil { 182 return nil 183 } 184 conns := c.adsConnections.ConnectionsForGroup(key) 185 for _, conn := range conns { 186 proxy := conn.Proxy() 187 entryName := autoregisteredWorkloadEntryName(proxy) 188 if entryName == "" { 189 continue 190 } 191 if err := c.registerWorkload(entryName, proxy, conn.ConnectedAt()); err != nil { 192 log.Error(err) 193 } 194 proxy.SetWorkloadEntry(entryName, true) 195 } 196 return nil 197 })) 198 199 c.store.RegisterEventHandler(gvk.WorkloadGroup, func(_ config.Config, cfg config.Config, event model.Event) { 200 if event == model.EventAdd { 201 c.lateRegistrationQueue.Add(cfg.NamespacedName()) 202 } 203 }) 204 } 205 206 func setConnectMeta(c *config.Config, controller string, conTime time.Time) { 207 if c.Annotations == nil { 208 c.Annotations = map[string]string{} 209 } 210 c.Annotations[annotation.IoIstioWorkloadController.Name] = controller 211 c.Annotations[annotation.IoIstioConnectedAt.Name] = conTime.Format(timeFormat) 212 delete(c.Annotations, annotation.IoIstioDisconnectedAt.Name) 213 } 214 215 // OnConnect determines whether a connecting proxy represents a non-Kubernetes 216 // workload and, if that's the case, initiates special processing required for that type 217 // of workloads, such as auto-registration, health status updates, etc. 218 // 219 // If connecting proxy represents a workload that is using auto-registration, it will 220 // create a WorkloadEntry resource automatically and be ready to receive health status 221 // updates. 222 // 223 // If connecting proxy represents a workload that is not using auto-registration, 224 // the WorkloadEntry resource is expected to exist beforehand. Otherwise, no special 225 // processing will be initiated, e.g. health status updates will be ignored. 226 func (c *Controller) OnConnect(conn connection) error { 227 if c == nil { 228 return nil 229 } 230 proxy := conn.Proxy() 231 var entryName string 232 var autoCreate bool 233 if features.WorkloadEntryAutoRegistration && proxy.Metadata.AutoRegisterGroup != "" { 234 entryName = autoregisteredWorkloadEntryName(proxy) 235 autoCreate = true 236 } else if features.WorkloadEntryHealthChecks && proxy.Metadata.WorkloadEntry != "" { 237 // a non-empty value of the `WorkloadEntry` field indicates that proxy must correspond to the WorkloadEntry 238 wle := c.store.Get(gvk.WorkloadEntry, proxy.Metadata.WorkloadEntry, proxy.Metadata.Namespace) 239 if wle == nil { 240 // either invalid proxy configuration or config propagation delay 241 return fmt.Errorf("proxy metadata indicates that it must correspond to an existing WorkloadEntry, "+ 242 "however WorkloadEntry %s/%s is not found", proxy.Metadata.Namespace, proxy.Metadata.WorkloadEntry) 243 } 244 if health.IsEligibleForHealthStatusUpdates(wle) { 245 if err := ensureProxyCanControlEntry(proxy, wle); err != nil { 246 return err 247 } 248 entryName = wle.Name 249 } 250 } 251 if entryName == "" { 252 return nil 253 } 254 255 proxy.SetWorkloadEntry(entryName, autoCreate) 256 c.adsConnections.Connect(conn) 257 258 err := c.onWorkloadConnect(entryName, proxy, conn.ConnectedAt(), autoCreate) 259 if err != nil { 260 log.Error(err) 261 } 262 return err 263 } 264 265 // ensureProxyCanControlEntry ensures the connected proxy's identity matches that of the WorkloadEntry it is associating with. 266 func ensureProxyCanControlEntry(proxy *model.Proxy, wle *config.Config) error { 267 if !features.ValidateWorkloadEntryIdentity { 268 // Validation disabled, skip 269 return nil 270 } 271 if proxy.VerifiedIdentity == nil { 272 return fmt.Errorf("registration of WorkloadEntry requires a verified identity") 273 } 274 if proxy.VerifiedIdentity.Namespace != wle.Namespace { 275 return fmt.Errorf("registration of WorkloadEntry namespace mismatch: %q vs %q", proxy.VerifiedIdentity.Namespace, wle.Namespace) 276 } 277 spec := wle.Spec.(*v1alpha3.WorkloadEntry) 278 if spec.ServiceAccount != "" && proxy.VerifiedIdentity.ServiceAccount != spec.ServiceAccount { 279 return fmt.Errorf("registration of WorkloadEntry service account mismatch: %q vs %q", proxy.VerifiedIdentity.ServiceAccount, spec.ServiceAccount) 280 } 281 return nil 282 } 283 284 // onWorkloadConnect creates/updates WorkloadEntry of the connecting workload. 285 // 286 // If workload is using auto-registration, WorkloadEntry will be created automatically. 287 // 288 // If workload is not using auto-registration, WorkloadEntry must already exist. 289 func (c *Controller) onWorkloadConnect(entryName string, proxy *model.Proxy, conTime time.Time, autoCreate bool) error { 290 if autoCreate { 291 return c.registerWorkload(entryName, proxy, conTime) 292 } 293 return c.becomeControllerOf(entryName, proxy, conTime) 294 } 295 296 // becomeControllerOf updates an existing WorkloadEntry of a workload that is not using 297 // auto-registration. 298 func (c *Controller) becomeControllerOf(entryName string, proxy *model.Proxy, conTime time.Time) error { 299 changed, err := c.changeWorkloadEntryStateToConnected(entryName, proxy, conTime) 300 if err != nil { 301 return err 302 } 303 if !changed { 304 return nil 305 } 306 log.Infof("updated health-checked WorkloadEntry %s/%s", proxy.Metadata.Namespace, entryName) 307 return nil 308 } 309 310 // registerWorkload creates or updates a WorkloadEntry of a workload that is using 311 // auto-registration. 312 func (c *Controller) registerWorkload(entryName string, proxy *model.Proxy, conTime time.Time) error { 313 wle := c.store.Get(gvk.WorkloadEntry, entryName, proxy.Metadata.Namespace) 314 if wle != nil { 315 if err := ensureProxyCanControlEntry(proxy, wle); err != nil { 316 return err 317 } 318 changed, err := c.changeWorkloadEntryStateToConnected(entryName, proxy, conTime) 319 if err != nil { 320 autoRegistrationErrors.Increment() 321 return err 322 } 323 if !changed { 324 return nil 325 } 326 autoRegistrationUpdates.Increment() 327 log.Infof("updated auto-registered WorkloadEntry %s/%s as connected", proxy.Metadata.Namespace, entryName) 328 return nil 329 } 330 331 // No WorkloadEntry, create one using fields from the associated WorkloadGroup 332 groupCfg := c.store.Get(gvk.WorkloadGroup, proxy.Metadata.AutoRegisterGroup, proxy.Metadata.Namespace) 333 if groupCfg == nil { 334 autoRegistrationErrors.Increment() 335 return grpcstatus.Errorf(codes.FailedPrecondition, "auto-registration WorkloadEntry of %v failed: cannot find WorkloadGroup %s/%s", 336 proxy.ID, proxy.Metadata.Namespace, proxy.Metadata.AutoRegisterGroup) 337 } 338 entry := workloadEntryFromGroup(entryName, proxy, groupCfg) 339 if err := ensureProxyCanControlEntry(proxy, entry); err != nil { 340 return err 341 } 342 setConnectMeta(entry, c.instanceID, conTime) 343 _, err := c.store.Create(*entry) 344 if err != nil { 345 autoRegistrationErrors.Increment() 346 return fmt.Errorf("auto-registration WorkloadEntry of %v failed: error creating WorkloadEntry: %v", proxy.ID, err) 347 } 348 hcMessage := "" 349 if health.IsEligibleForHealthStatusUpdates(entry) { 350 hcMessage = " with health checking enabled" 351 } 352 autoRegistrationSuccess.Increment() 353 log.Infof("auto-registered WorkloadEntry %s/%s%s", proxy.Metadata.Namespace, entryName, hcMessage) 354 return nil 355 } 356 357 // changeWorkloadEntryStateToConnected updates given WorkloadEntry to reflect that 358 // it is now connected to this particular `istiod` instance. 359 func (c *Controller) changeWorkloadEntryStateToConnected(entryName string, proxy *model.Proxy, conTime time.Time) (bool, error) { 360 wle := c.store.Get(gvk.WorkloadEntry, entryName, proxy.Metadata.Namespace) 361 if wle == nil { 362 return false, fmt.Errorf("failed updating WorkloadEntry %s/%s: WorkloadEntry not found", proxy.Metadata.Namespace, entryName) 363 } 364 365 // check if this was actually disconnected AFTER this connTime 366 // this check can miss, but when it does the `Update` will fail due to versioning 367 // and retry. The retry includes this check and passes the next time. 368 if timestamp, ok := wle.Annotations[annotation.IoIstioDisconnectedAt.Name]; ok { 369 disconnTime, _ := time.Parse(timeFormat, timestamp) 370 if conTime.Before(disconnTime) { 371 // we slowly processed a connect and disconnected before getting to this point 372 return false, nil 373 } 374 } 375 376 lastConTime, _ := time.Parse(timeFormat, wle.Annotations[annotation.IoIstioConnectedAt.Name]) 377 // the proxy has reconnected to another pilot, not belong to this one. 378 if conTime.Before(lastConTime) { 379 return false, nil 380 } 381 // Try to update, if it fails we retry all the above logic since the WLE changed 382 updated := wle.DeepCopy() 383 setConnectMeta(&updated, c.instanceID, conTime) 384 _, err := c.store.Update(updated) 385 if err != nil { 386 return false, fmt.Errorf("failed updating WorkloadEntry %s/%s err: %v", proxy.Metadata.Namespace, entryName, err) 387 } 388 return true, nil 389 } 390 391 // changeWorkloadEntryStateToDisconnected updates given WorkloadEntry to reflect that 392 // it is no longer connected to this particular `istiod` instance. 393 func (c *Controller) changeWorkloadEntryStateToDisconnected(entryName string, proxy *model.Proxy, disconTime, origConnTime time.Time) (bool, error) { 394 // unset controller, set disconnect time 395 cfg := c.store.Get(gvk.WorkloadEntry, entryName, proxy.Metadata.Namespace) 396 if cfg == nil { 397 log.Infof("workloadentry %s/%s is not found, maybe deleted or because of propagate latency", 398 proxy.Metadata.Namespace, entryName) 399 // return error and backoff retry to prevent workloadentry leak 400 return false, fmt.Errorf("workloadentry %s/%s is not found", proxy.Metadata.Namespace, entryName) 401 } 402 403 // only queue a delete if this disconnect event is associated with the last connect event written to the workload entry 404 if mostRecentConn, err := time.Parse(timeFormat, cfg.Annotations[annotation.IoIstioConnectedAt.Name]); err == nil { 405 if mostRecentConn.After(origConnTime) { 406 // this disconnect event wasn't processed until after we successfully reconnected 407 return false, nil 408 } 409 } 410 // The wle has reconnected to another istiod and controlled by it. 411 if cfg.Annotations[annotation.IoIstioWorkloadController.Name] != c.instanceID { 412 return false, nil 413 } 414 415 conTime, _ := time.Parse(timeFormat, cfg.Annotations[annotation.IoIstioConnectedAt.Name]) 416 // The wle has reconnected to this istiod, 417 // this may happen when the unregister fails retry 418 if disconTime.Before(conTime) { 419 return false, nil 420 } 421 422 wle := cfg.DeepCopy() 423 delete(wle.Annotations, annotation.IoIstioConnectedAt.Name) 424 wle.Annotations[annotation.IoIstioDisconnectedAt.Name] = disconTime.Format(timeFormat) 425 // use update instead of patch to prevent race condition 426 _, err := c.store.Update(wle) 427 if err != nil { 428 return false, fmt.Errorf("disconnect: failed updating WorkloadEntry %s/%s: %v", proxy.Metadata.Namespace, entryName, err) 429 } 430 return true, nil 431 } 432 433 // OnDisconnect determines whether a connected proxy represents a non-Kubernetes 434 // workload and, if that's the case, terminates special processing required for that type 435 // of workloads, such as auto-registration, health status updates, etc. 436 // 437 // If proxy represents a workload (be it auto-registered or not), WorkloadEntry resource 438 // will be updated to reflect that the proxy is no longer connected to this particular `istiod` 439 // instance. 440 // 441 // Besides that, if proxy represents a workload that is using auto-registration, WorkloadEntry 442 // resource will be scheduled for removal if the proxy does not reconnect within a grace period. 443 // 444 // If proxy represents a workload that is not using auto-registration, WorkloadEntry resource 445 // will be scheduled to be marked unhealthy if the proxy does not reconnect within a grace period. 446 func (c *Controller) OnDisconnect(conn connection) { 447 if c == nil { 448 return 449 } 450 if !features.WorkloadEntryAutoRegistration && !features.WorkloadEntryHealthChecks { 451 return 452 } 453 proxy := conn.Proxy() 454 // check if the WE already exists, update the status 455 entryName, autoCreate := proxy.WorkloadEntry() 456 if entryName == "" { 457 return 458 } 459 460 // if there is still an ads connection, do not unregister. 461 if remainingConnections := c.adsConnections.Disconnect(conn); remainingConnections { 462 return 463 } 464 465 proxy.RLock() 466 defer proxy.RUnlock() 467 workload := &workItem{ 468 entryName: entryName, 469 autoCreated: autoCreate, 470 proxy: conn.Proxy(), 471 disConTime: time.Now(), 472 origConTime: conn.ConnectedAt(), 473 } 474 // queue has max retry itself 475 c.queue.Add(workload) 476 } 477 478 func (c *Controller) unregisterWorkload(item any) error { 479 workItem, ok := item.(*workItem) 480 if !ok { 481 return nil 482 } 483 484 changed, err := c.changeWorkloadEntryStateToDisconnected(workItem.entryName, workItem.proxy, workItem.disConTime, workItem.origConTime) 485 if err != nil { 486 autoRegistrationErrors.Increment() 487 return err 488 } 489 if !changed { 490 return nil 491 } 492 log.Infof("updated auto-registered WorkloadEntry %s/%s as disconnected", workItem.proxy.Metadata.Namespace, workItem.entryName) 493 494 if workItem.autoCreated { 495 autoRegistrationUnregistrations.Increment() 496 } 497 498 // after grace period, check if the workload ever reconnected 499 ns := workItem.proxy.Metadata.Namespace 500 c.cleanupQueue.PushDelayed(func() error { 501 wle := c.store.Get(gvk.WorkloadEntry, workItem.entryName, ns) 502 if wle == nil { 503 return nil 504 } 505 if c.shouldCleanupEntry(*wle) { 506 c.cleanupEntry(*wle, false) 507 } 508 return nil 509 }, features.WorkloadEntryCleanupGracePeriod) 510 return nil 511 } 512 513 // QueueWorkloadEntryHealth enqueues the associated WorkloadEntries health status. 514 func (c *Controller) QueueWorkloadEntryHealth(proxy *model.Proxy, event HealthEvent) { 515 if !features.WorkloadEntryHealthChecks { 516 return 517 } 518 c.healthController.QueueWorkloadEntryHealth(proxy, event) 519 } 520 521 // periodicWorkloadEntryCleanup checks lists all WorkloadEntry 522 func (c *Controller) periodicWorkloadEntryCleanup(stopCh <-chan struct{}) { 523 if !features.WorkloadEntryAutoRegistration && !features.WorkloadEntryHealthChecks { 524 return 525 } 526 ticker := time.NewTicker(10 * features.WorkloadEntryCleanupGracePeriod) 527 defer ticker.Stop() 528 for { 529 select { 530 case <-ticker.C: 531 wles := c.store.List(gvk.WorkloadEntry, metav1.NamespaceAll) 532 for _, wle := range wles { 533 wle := wle 534 if c.shouldCleanupEntry(wle) { 535 c.cleanupQueue.Push(func() error { 536 c.cleanupEntry(wle, true) 537 return nil 538 }) 539 } 540 } 541 case <-stopCh: 542 return 543 } 544 } 545 } 546 547 func (c *Controller) shouldCleanupEntry(wle config.Config) bool { 548 // don't clean up if WorkloadEntry is neither auto-registered 549 // nor health-checked 550 if !isAutoRegisteredWorkloadEntry(&wle) && 551 !(isHealthCheckedWorkloadEntry(&wle) && health.HasHealthCondition(&wle)) { 552 return false 553 } 554 555 // If there is `istio.io/connectedAt` set, don't cleanup this workload entry. 556 // This may happen when the workload fast reconnects to the same istiod. 557 // 1. disconnect: the workload entry has been updated 558 // 2. connect: but the patch is based on the old workloadentry because of the propagation latency. 559 // So in this case the `istio.io/disconnectedAt` is still there and the cleanup procedure will go on. 560 connTime := wle.Annotations[annotation.IoIstioConnectedAt.Name] 561 if connTime != "" { 562 // handle workload leak when both workload/pilot down at the same time before pilot has a chance to set disconnTime 563 connAt, err := time.Parse(timeFormat, connTime) 564 if err == nil && uint64(time.Since(connAt)) > uint64(c.maxConnectionAge) { 565 return true 566 } 567 return false 568 } 569 570 disconnTime := wle.Annotations[annotation.IoIstioDisconnectedAt.Name] 571 if disconnTime == "" { 572 return false 573 } 574 575 disconnAt, err := time.Parse(timeFormat, disconnTime) 576 // if we haven't passed the grace period, don't cleanup 577 if err == nil && time.Since(disconnAt) < features.WorkloadEntryCleanupGracePeriod { 578 return false 579 } 580 581 return true 582 } 583 584 // cleanupEntry performs clean-up actions on a WorkloadEntry of a proxy that hasn't 585 // reconnected within a grace period. 586 func (c *Controller) cleanupEntry(wle config.Config, periodic bool) { 587 if err := c.cleanupLimit.Wait(context.TODO()); err != nil { 588 log.Errorf("error in WorkloadEntry cleanup rate limiter: %v", err) 589 return 590 } 591 if isAutoRegisteredWorkloadEntry(&wle) { 592 c.deleteEntry(wle, periodic) 593 return 594 } 595 if isHealthCheckedWorkloadEntry(&wle) && health.HasHealthCondition(&wle) { 596 c.deleteHealthCondition(wle, periodic) 597 return 598 } 599 } 600 601 // deleteEntry removes WorkloadEntry that was created automatically for a workload 602 // that is using auto-registration. 603 func (c *Controller) deleteEntry(wle config.Config, periodic bool) { 604 if err := c.store.Delete(gvk.WorkloadEntry, wle.Name, wle.Namespace, &wle.ResourceVersion); err != nil && !errors.IsNotFound(err) { 605 log.Warnf("failed cleaning up auto-registered WorkloadEntry %s/%s: %v", wle.Namespace, wle.Name, err) 606 autoRegistrationErrors.Increment() 607 return 608 } 609 autoRegistrationDeletes.Increment() 610 log.Infof("cleaned up auto-registered WorkloadEntry %s/%s periodic:%v", wle.Namespace, wle.Name, periodic) 611 } 612 613 // deleteHealthCondition updates WorkloadEntry of a workload that is not using auto-registration 614 // to remove information about the health status (since we can no longer be certain about it). 615 func (c *Controller) deleteHealthCondition(wle config.Config, periodic bool) { 616 err := c.stateStore.DeleteHealthCondition(wle) 617 if err != nil { 618 log.Warnf("failed cleaning up health-checked WorkloadEntry %s/%s: %v", wle.Namespace, wle.Name, err) 619 return 620 } 621 log.Infof("cleaned up health-checked WorkloadEntry %s/%s periodic:%v", wle.Namespace, wle.Name, periodic) 622 } 623 624 // IsControllerOf implements state.StoreCallbacks. 625 func (c *Controller) IsControllerOf(wle *config.Config) bool { 626 if wle == nil { 627 return false 628 } 629 return wle.Annotations[annotation.IoIstioWorkloadController.Name] == c.instanceID 630 } 631 632 func autoregisteredWorkloadEntryName(proxy *model.Proxy) string { 633 if proxy.Metadata.AutoRegisterGroup == "" { 634 return "" 635 } 636 if len(proxy.IPAddresses) == 0 { 637 log.Errorf("auto-registration of %v failed: missing IP addresses", proxy.ID) 638 return "" 639 } 640 if len(proxy.Metadata.Namespace) == 0 { 641 log.Errorf("auto-registration of %v failed: missing namespace", proxy.ID) 642 return "" 643 } 644 p := []string{proxy.Metadata.AutoRegisterGroup, sanitizeIP(proxy.IPAddresses[0])} 645 if proxy.Metadata.Network != "" { 646 p = append(p, string(proxy.Metadata.Network)) 647 } 648 649 name := strings.Join(p, "-") 650 if len(name) > 253 { 651 name = name[len(name)-253:] 652 log.Warnf("generated WorkloadEntry name is too long, consider making the WorkloadGroup name shorter. Shortening from beginning to: %s", name) 653 } 654 return name 655 } 656 657 // sanitizeIP ensures an IP address (IPv6) can be used in Kubernetes resource name 658 func sanitizeIP(s string) string { 659 return strings.ReplaceAll(s, ":", "-") 660 } 661 662 func mergeLabels(labels ...map[string]string) map[string]string { 663 if len(labels) == 0 { 664 return map[string]string{} 665 } 666 out := make(map[string]string, len(labels)*len(labels[0])) 667 for _, lm := range labels { 668 for k, v := range lm { 669 out[k] = v 670 } 671 } 672 return out 673 } 674 675 var workloadGroupIsController = true 676 677 func workloadEntryFromGroup(name string, proxy *model.Proxy, groupCfg *config.Config) *config.Config { 678 group := groupCfg.Spec.(*v1alpha3.WorkloadGroup) 679 entry := group.Template.DeepCopy() 680 entry.Address = proxy.IPAddresses[0] 681 // TODO move labels out of entry 682 // node metadata > WorkloadGroup.Metadata > WorkloadGroup.Template 683 if group.Metadata != nil && group.Metadata.Labels != nil { 684 entry.Labels = mergeLabels(entry.Labels, group.Metadata.Labels) 685 } 686 // Explicitly do not use proxy.Labels, as it is only initialized *after* we register the workload, 687 // and it would be circular, as it will set the labels based on the WorkloadEntry -- but we are creating 688 // the workload entry. 689 if proxy.Metadata.Labels != nil { 690 entry.Labels = mergeLabels(entry.Labels, proxy.Metadata.Labels) 691 // the label has been converted to "istio-locality: region/zone/subzone" 692 // in pilot/pkg/xds/ads.go, and `/` is not allowed in k8s label value. 693 // Instead of converting again, we delete it since has set WorkloadEntry.Locality 694 delete(entry.Labels, model.LocalityLabel) 695 } 696 697 annotations := map[string]string{annotation.IoIstioAutoRegistrationGroup.Name: groupCfg.Name} 698 if group.Metadata != nil && group.Metadata.Annotations != nil { 699 annotations = mergeLabels(annotations, group.Metadata.Annotations) 700 } 701 702 if proxy.Metadata.Network != "" { 703 entry.Network = string(proxy.Metadata.Network) 704 } 705 if proxy.Locality != nil { 706 entry.Locality = util.LocalityToString(proxy.Locality) 707 } 708 if proxy.Metadata.ProxyConfig != nil && proxy.Metadata.ProxyConfig.ReadinessProbe != nil { 709 annotations[status.WorkloadEntryHealthCheckAnnotation] = "true" 710 } 711 return &config.Config{ 712 Meta: config.Meta{ 713 GroupVersionKind: gvk.WorkloadEntry, 714 Name: name, 715 Namespace: proxy.Metadata.Namespace, 716 Labels: entry.Labels, 717 Annotations: annotations, 718 OwnerReferences: []metav1.OwnerReference{{ 719 APIVersion: groupCfg.GroupVersionKind.GroupVersion(), 720 Kind: groupCfg.GroupVersionKind.Kind, 721 Name: groupCfg.Name, 722 UID: kubetypes.UID(groupCfg.UID), 723 Controller: &workloadGroupIsController, 724 }}, 725 }, 726 Spec: entry, 727 // TODO status fields used for garbage collection 728 Status: nil, 729 } 730 } 731 732 func isAutoRegisteredWorkloadEntry(wle *config.Config) bool { 733 return wle != nil && wle.Annotations[annotation.IoIstioAutoRegistrationGroup.Name] != "" 734 } 735 736 func isHealthCheckedWorkloadEntry(wle *config.Config) bool { 737 return wle != nil && wle.Annotations[annotation.IoIstioWorkloadController.Name] != "" && !isAutoRegisteredWorkloadEntry(wle) 738 }