istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/autoregistration/controller.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package autoregistration
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"strings"
    22  	"time"
    23  
    24  	"golang.org/x/time/rate"
    25  	"google.golang.org/grpc/codes"
    26  	grpcstatus "google.golang.org/grpc/status"
    27  	"k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	kubetypes "k8s.io/apimachinery/pkg/types"
    30  
    31  	"istio.io/api/annotation"
    32  	"istio.io/api/networking/v1alpha3"
    33  	"istio.io/istio/pilot/pkg/autoregistration/internal/health"
    34  	"istio.io/istio/pilot/pkg/autoregistration/internal/state"
    35  	"istio.io/istio/pilot/pkg/features"
    36  	"istio.io/istio/pilot/pkg/model"
    37  	"istio.io/istio/pilot/pkg/model/status"
    38  	"istio.io/istio/pilot/pkg/networking/util"
    39  	"istio.io/istio/pkg/config"
    40  	"istio.io/istio/pkg/config/schema/gvk"
    41  	"istio.io/istio/pkg/kube/controllers"
    42  	istiolog "istio.io/istio/pkg/log"
    43  	"istio.io/istio/pkg/monitoring"
    44  	"istio.io/istio/pkg/queue"
    45  )
    46  
    47  var (
    48  	autoRegistrationSuccess = monitoring.NewSum(
    49  		"auto_registration_success_total",
    50  		"Total number of successful auto registrations.",
    51  	)
    52  
    53  	autoRegistrationUpdates = monitoring.NewSum(
    54  		"auto_registration_updates_total",
    55  		"Total number of auto registration updates.",
    56  	)
    57  
    58  	autoRegistrationUnregistrations = monitoring.NewSum(
    59  		"auto_registration_unregister_total",
    60  		"Total number of unregistrations.",
    61  	)
    62  
    63  	autoRegistrationDeletes = monitoring.NewSum(
    64  		"auto_registration_deletes_total",
    65  		"Total number of auto registration cleaned up by periodic timer.",
    66  	)
    67  
    68  	autoRegistrationErrors = monitoring.NewSum(
    69  		"auto_registration_errors_total",
    70  		"Total number of auto registration errors.",
    71  	)
    72  )
    73  
    74  const (
    75  	timeFormat = time.RFC3339Nano
    76  	// maxRetries is the number of times a service will be retried before it is dropped out of the queue.
    77  	// With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the
    78  	// sequence of delays between successive queuings of a service.
    79  	//
    80  	// 5ms, 10ms, 20ms, 40ms, 80ms
    81  	maxRetries = 5
    82  )
    83  
    84  var log = istiolog.RegisterScope("wle", "wle controller debugging")
    85  
    86  type Controller struct {
    87  	instanceID string
    88  	// TODO move WorkloadEntry related tasks into their own object and give InternalGen a reference.
    89  	// store should either be k8s (for running pilot) or in-memory (for tests). MCP and other config store implementations
    90  	// do not support writing. We only use it here for reading WorkloadEntry/WorkloadGroup.
    91  	store model.ConfigStoreController
    92  
    93  	// Note: unregister is to update the workload entry status: like setting `istio.io/disconnectedAt`
    94  	// and make the workload entry enqueue `cleanupQueue`
    95  	// cleanup is to delete the workload entry
    96  
    97  	// queue contains workloadEntry that need to be unregistered
    98  	queue controllers.Queue
    99  	// cleanupLimit rate limit's auto registered WorkloadEntry cleanup calls to k8s
   100  	cleanupLimit *rate.Limiter
   101  	// cleanupQueue delays the cleanup of auto registered WorkloadEntries to allow for grace period
   102  	cleanupQueue queue.Delayed
   103  
   104  	adsConnections        *adsConnections
   105  	lateRegistrationQueue controllers.Queue
   106  
   107  	// maxConnectionAge is a duration that workload entry should be cleaned up if it does not reconnects.
   108  	maxConnectionAge time.Duration
   109  
   110  	stateStore       *state.Store
   111  	healthController *health.Controller
   112  }
   113  
   114  type HealthEvent = health.HealthEvent
   115  
   116  // NewController create a controller which manages workload lifecycle and health status.
   117  func NewController(store model.ConfigStoreController, instanceID string, maxConnAge time.Duration) *Controller {
   118  	if !features.WorkloadEntryAutoRegistration && !features.WorkloadEntryHealthChecks {
   119  		return nil
   120  	}
   121  
   122  	if maxConnAge != math.MaxInt64 {
   123  		maxConnAge += maxConnAge / 2
   124  		// if overflow, set it to max int64
   125  		if maxConnAge < 0 {
   126  			maxConnAge = time.Duration(math.MaxInt64)
   127  		}
   128  	}
   129  	c := &Controller{
   130  		instanceID:       instanceID,
   131  		store:            store,
   132  		cleanupLimit:     rate.NewLimiter(rate.Limit(20), 1),
   133  		cleanupQueue:     queue.NewDelayed(),
   134  		adsConnections:   newAdsConnections(),
   135  		maxConnectionAge: maxConnAge,
   136  	}
   137  	c.queue = controllers.NewQueue("unregister_workloadentry",
   138  		controllers.WithMaxAttempts(maxRetries),
   139  		controllers.WithGenericReconciler(c.unregisterWorkload))
   140  	c.stateStore = state.NewStore(store, c)
   141  	c.healthController = health.NewController(c.stateStore, maxRetries)
   142  	c.setupAutoRecreate()
   143  	return c
   144  }
   145  
   146  func (c *Controller) Run(stop <-chan struct{}) {
   147  	if c == nil {
   148  		return
   149  	}
   150  	if c.store != nil && c.cleanupQueue != nil {
   151  		go c.periodicWorkloadEntryCleanup(stop)
   152  		go c.cleanupQueue.Run(stop)
   153  	}
   154  	if features.WorkloadEntryAutoRegistration {
   155  		go c.lateRegistrationQueue.Run(stop)
   156  	}
   157  
   158  	go c.queue.Run(stop)
   159  	go c.healthController.Run(stop)
   160  	<-stop
   161  }
   162  
   163  // workItem contains the state of a "disconnect" event used to unregister a workload.
   164  type workItem struct {
   165  	entryName   string
   166  	autoCreated bool
   167  	proxy       *model.Proxy
   168  	disConTime  time.Time
   169  	origConTime time.Time
   170  }
   171  
   172  // setupAutoRecreate adds a handler to create entries for existing connections when a WG is added
   173  func (c *Controller) setupAutoRecreate() {
   174  	if !features.WorkloadEntryAutoRegistration {
   175  		return
   176  	}
   177  	c.lateRegistrationQueue = controllers.NewQueue("auto-register existing connections",
   178  		controllers.WithReconciler(func(key kubetypes.NamespacedName) error {
   179  			log.Debugf("(%s) processing WorkloadGroup add for %s/%s", c.instanceID, key.Namespace, key.Name)
   180  			// WorkloadGroup doesn't exist anymore, skip this.
   181  			if c.store.Get(gvk.WorkloadGroup, key.Name, key.Namespace) == nil {
   182  				return nil
   183  			}
   184  			conns := c.adsConnections.ConnectionsForGroup(key)
   185  			for _, conn := range conns {
   186  				proxy := conn.Proxy()
   187  				entryName := autoregisteredWorkloadEntryName(proxy)
   188  				if entryName == "" {
   189  					continue
   190  				}
   191  				if err := c.registerWorkload(entryName, proxy, conn.ConnectedAt()); err != nil {
   192  					log.Error(err)
   193  				}
   194  				proxy.SetWorkloadEntry(entryName, true)
   195  			}
   196  			return nil
   197  		}))
   198  
   199  	c.store.RegisterEventHandler(gvk.WorkloadGroup, func(_ config.Config, cfg config.Config, event model.Event) {
   200  		if event == model.EventAdd {
   201  			c.lateRegistrationQueue.Add(cfg.NamespacedName())
   202  		}
   203  	})
   204  }
   205  
   206  func setConnectMeta(c *config.Config, controller string, conTime time.Time) {
   207  	if c.Annotations == nil {
   208  		c.Annotations = map[string]string{}
   209  	}
   210  	c.Annotations[annotation.IoIstioWorkloadController.Name] = controller
   211  	c.Annotations[annotation.IoIstioConnectedAt.Name] = conTime.Format(timeFormat)
   212  	delete(c.Annotations, annotation.IoIstioDisconnectedAt.Name)
   213  }
   214  
   215  // OnConnect determines whether a connecting proxy represents a non-Kubernetes
   216  // workload and, if that's the case, initiates special processing required for that type
   217  // of workloads, such as auto-registration, health status updates, etc.
   218  //
   219  // If connecting proxy represents a workload that is using auto-registration, it will
   220  // create a WorkloadEntry resource automatically and be ready to receive health status
   221  // updates.
   222  //
   223  // If connecting proxy represents a workload that is not using auto-registration,
   224  // the WorkloadEntry resource is expected to exist beforehand. Otherwise, no special
   225  // processing will be initiated, e.g. health status updates will be ignored.
   226  func (c *Controller) OnConnect(conn connection) error {
   227  	if c == nil {
   228  		return nil
   229  	}
   230  	proxy := conn.Proxy()
   231  	var entryName string
   232  	var autoCreate bool
   233  	if features.WorkloadEntryAutoRegistration && proxy.Metadata.AutoRegisterGroup != "" {
   234  		entryName = autoregisteredWorkloadEntryName(proxy)
   235  		autoCreate = true
   236  	} else if features.WorkloadEntryHealthChecks && proxy.Metadata.WorkloadEntry != "" {
   237  		// a non-empty value of the `WorkloadEntry` field indicates that proxy must correspond to the WorkloadEntry
   238  		wle := c.store.Get(gvk.WorkloadEntry, proxy.Metadata.WorkloadEntry, proxy.Metadata.Namespace)
   239  		if wle == nil {
   240  			// either invalid proxy configuration or config propagation delay
   241  			return fmt.Errorf("proxy metadata indicates that it must correspond to an existing WorkloadEntry, "+
   242  				"however WorkloadEntry %s/%s is not found", proxy.Metadata.Namespace, proxy.Metadata.WorkloadEntry)
   243  		}
   244  		if health.IsEligibleForHealthStatusUpdates(wle) {
   245  			if err := ensureProxyCanControlEntry(proxy, wle); err != nil {
   246  				return err
   247  			}
   248  			entryName = wle.Name
   249  		}
   250  	}
   251  	if entryName == "" {
   252  		return nil
   253  	}
   254  
   255  	proxy.SetWorkloadEntry(entryName, autoCreate)
   256  	c.adsConnections.Connect(conn)
   257  
   258  	err := c.onWorkloadConnect(entryName, proxy, conn.ConnectedAt(), autoCreate)
   259  	if err != nil {
   260  		log.Error(err)
   261  	}
   262  	return err
   263  }
   264  
   265  // ensureProxyCanControlEntry ensures the connected proxy's identity matches that of the WorkloadEntry it is associating with.
   266  func ensureProxyCanControlEntry(proxy *model.Proxy, wle *config.Config) error {
   267  	if !features.ValidateWorkloadEntryIdentity {
   268  		// Validation disabled, skip
   269  		return nil
   270  	}
   271  	if proxy.VerifiedIdentity == nil {
   272  		return fmt.Errorf("registration of WorkloadEntry requires a verified identity")
   273  	}
   274  	if proxy.VerifiedIdentity.Namespace != wle.Namespace {
   275  		return fmt.Errorf("registration of WorkloadEntry namespace mismatch: %q vs %q", proxy.VerifiedIdentity.Namespace, wle.Namespace)
   276  	}
   277  	spec := wle.Spec.(*v1alpha3.WorkloadEntry)
   278  	if spec.ServiceAccount != "" && proxy.VerifiedIdentity.ServiceAccount != spec.ServiceAccount {
   279  		return fmt.Errorf("registration of WorkloadEntry service account mismatch: %q vs %q", proxy.VerifiedIdentity.ServiceAccount, spec.ServiceAccount)
   280  	}
   281  	return nil
   282  }
   283  
   284  // onWorkloadConnect creates/updates WorkloadEntry of the connecting workload.
   285  //
   286  // If workload is using auto-registration, WorkloadEntry will be created automatically.
   287  //
   288  // If workload is not using auto-registration, WorkloadEntry must already exist.
   289  func (c *Controller) onWorkloadConnect(entryName string, proxy *model.Proxy, conTime time.Time, autoCreate bool) error {
   290  	if autoCreate {
   291  		return c.registerWorkload(entryName, proxy, conTime)
   292  	}
   293  	return c.becomeControllerOf(entryName, proxy, conTime)
   294  }
   295  
   296  // becomeControllerOf updates an existing WorkloadEntry of a workload that is not using
   297  // auto-registration.
   298  func (c *Controller) becomeControllerOf(entryName string, proxy *model.Proxy, conTime time.Time) error {
   299  	changed, err := c.changeWorkloadEntryStateToConnected(entryName, proxy, conTime)
   300  	if err != nil {
   301  		return err
   302  	}
   303  	if !changed {
   304  		return nil
   305  	}
   306  	log.Infof("updated health-checked WorkloadEntry %s/%s", proxy.Metadata.Namespace, entryName)
   307  	return nil
   308  }
   309  
   310  // registerWorkload creates or updates a WorkloadEntry of a workload that is using
   311  // auto-registration.
   312  func (c *Controller) registerWorkload(entryName string, proxy *model.Proxy, conTime time.Time) error {
   313  	wle := c.store.Get(gvk.WorkloadEntry, entryName, proxy.Metadata.Namespace)
   314  	if wle != nil {
   315  		if err := ensureProxyCanControlEntry(proxy, wle); err != nil {
   316  			return err
   317  		}
   318  		changed, err := c.changeWorkloadEntryStateToConnected(entryName, proxy, conTime)
   319  		if err != nil {
   320  			autoRegistrationErrors.Increment()
   321  			return err
   322  		}
   323  		if !changed {
   324  			return nil
   325  		}
   326  		autoRegistrationUpdates.Increment()
   327  		log.Infof("updated auto-registered WorkloadEntry %s/%s as connected", proxy.Metadata.Namespace, entryName)
   328  		return nil
   329  	}
   330  
   331  	// No WorkloadEntry, create one using fields from the associated WorkloadGroup
   332  	groupCfg := c.store.Get(gvk.WorkloadGroup, proxy.Metadata.AutoRegisterGroup, proxy.Metadata.Namespace)
   333  	if groupCfg == nil {
   334  		autoRegistrationErrors.Increment()
   335  		return grpcstatus.Errorf(codes.FailedPrecondition, "auto-registration WorkloadEntry of %v failed: cannot find WorkloadGroup %s/%s",
   336  			proxy.ID, proxy.Metadata.Namespace, proxy.Metadata.AutoRegisterGroup)
   337  	}
   338  	entry := workloadEntryFromGroup(entryName, proxy, groupCfg)
   339  	if err := ensureProxyCanControlEntry(proxy, entry); err != nil {
   340  		return err
   341  	}
   342  	setConnectMeta(entry, c.instanceID, conTime)
   343  	_, err := c.store.Create(*entry)
   344  	if err != nil {
   345  		autoRegistrationErrors.Increment()
   346  		return fmt.Errorf("auto-registration WorkloadEntry of %v failed: error creating WorkloadEntry: %v", proxy.ID, err)
   347  	}
   348  	hcMessage := ""
   349  	if health.IsEligibleForHealthStatusUpdates(entry) {
   350  		hcMessage = " with health checking enabled"
   351  	}
   352  	autoRegistrationSuccess.Increment()
   353  	log.Infof("auto-registered WorkloadEntry %s/%s%s", proxy.Metadata.Namespace, entryName, hcMessage)
   354  	return nil
   355  }
   356  
   357  // changeWorkloadEntryStateToConnected updates given WorkloadEntry to reflect that
   358  // it is now connected to this particular `istiod` instance.
   359  func (c *Controller) changeWorkloadEntryStateToConnected(entryName string, proxy *model.Proxy, conTime time.Time) (bool, error) {
   360  	wle := c.store.Get(gvk.WorkloadEntry, entryName, proxy.Metadata.Namespace)
   361  	if wle == nil {
   362  		return false, fmt.Errorf("failed updating WorkloadEntry %s/%s: WorkloadEntry not found", proxy.Metadata.Namespace, entryName)
   363  	}
   364  
   365  	// check if this was actually disconnected AFTER this connTime
   366  	// this check can miss, but when it does the `Update` will fail due to versioning
   367  	// and retry. The retry includes this check and passes the next time.
   368  	if timestamp, ok := wle.Annotations[annotation.IoIstioDisconnectedAt.Name]; ok {
   369  		disconnTime, _ := time.Parse(timeFormat, timestamp)
   370  		if conTime.Before(disconnTime) {
   371  			// we slowly processed a connect and disconnected before getting to this point
   372  			return false, nil
   373  		}
   374  	}
   375  
   376  	lastConTime, _ := time.Parse(timeFormat, wle.Annotations[annotation.IoIstioConnectedAt.Name])
   377  	// the proxy has reconnected to another pilot, not belong to this one.
   378  	if conTime.Before(lastConTime) {
   379  		return false, nil
   380  	}
   381  	// Try to update, if it fails we retry all the above logic since the WLE changed
   382  	updated := wle.DeepCopy()
   383  	setConnectMeta(&updated, c.instanceID, conTime)
   384  	_, err := c.store.Update(updated)
   385  	if err != nil {
   386  		return false, fmt.Errorf("failed updating WorkloadEntry %s/%s err: %v", proxy.Metadata.Namespace, entryName, err)
   387  	}
   388  	return true, nil
   389  }
   390  
   391  // changeWorkloadEntryStateToDisconnected updates given WorkloadEntry to reflect that
   392  // it is no longer connected to this particular `istiod` instance.
   393  func (c *Controller) changeWorkloadEntryStateToDisconnected(entryName string, proxy *model.Proxy, disconTime, origConnTime time.Time) (bool, error) {
   394  	// unset controller, set disconnect time
   395  	cfg := c.store.Get(gvk.WorkloadEntry, entryName, proxy.Metadata.Namespace)
   396  	if cfg == nil {
   397  		log.Infof("workloadentry %s/%s is not found, maybe deleted or because of propagate latency",
   398  			proxy.Metadata.Namespace, entryName)
   399  		// return error and backoff retry to prevent workloadentry leak
   400  		return false, fmt.Errorf("workloadentry %s/%s is not found", proxy.Metadata.Namespace, entryName)
   401  	}
   402  
   403  	// only queue a delete if this disconnect event is associated with the last connect event written to the workload entry
   404  	if mostRecentConn, err := time.Parse(timeFormat, cfg.Annotations[annotation.IoIstioConnectedAt.Name]); err == nil {
   405  		if mostRecentConn.After(origConnTime) {
   406  			// this disconnect event wasn't processed until after we successfully reconnected
   407  			return false, nil
   408  		}
   409  	}
   410  	// The wle has reconnected to another istiod and controlled by it.
   411  	if cfg.Annotations[annotation.IoIstioWorkloadController.Name] != c.instanceID {
   412  		return false, nil
   413  	}
   414  
   415  	conTime, _ := time.Parse(timeFormat, cfg.Annotations[annotation.IoIstioConnectedAt.Name])
   416  	// The wle has reconnected to this istiod,
   417  	// this may happen when the unregister fails retry
   418  	if disconTime.Before(conTime) {
   419  		return false, nil
   420  	}
   421  
   422  	wle := cfg.DeepCopy()
   423  	delete(wle.Annotations, annotation.IoIstioConnectedAt.Name)
   424  	wle.Annotations[annotation.IoIstioDisconnectedAt.Name] = disconTime.Format(timeFormat)
   425  	// use update instead of patch to prevent race condition
   426  	_, err := c.store.Update(wle)
   427  	if err != nil {
   428  		return false, fmt.Errorf("disconnect: failed updating WorkloadEntry %s/%s: %v", proxy.Metadata.Namespace, entryName, err)
   429  	}
   430  	return true, nil
   431  }
   432  
   433  // OnDisconnect determines whether a connected proxy represents a non-Kubernetes
   434  // workload and, if that's the case, terminates special processing required for that type
   435  // of workloads, such as auto-registration, health status updates, etc.
   436  //
   437  // If proxy represents a workload (be it auto-registered or not), WorkloadEntry resource
   438  // will be updated to reflect that the proxy is no longer connected to this particular `istiod`
   439  // instance.
   440  //
   441  // Besides that, if proxy represents a workload that is using auto-registration, WorkloadEntry
   442  // resource will be scheduled for removal if the proxy does not reconnect within a grace period.
   443  //
   444  // If proxy represents a workload that is not using auto-registration, WorkloadEntry resource
   445  // will be scheduled to be marked unhealthy if the proxy does not reconnect within a grace period.
   446  func (c *Controller) OnDisconnect(conn connection) {
   447  	if c == nil {
   448  		return
   449  	}
   450  	if !features.WorkloadEntryAutoRegistration && !features.WorkloadEntryHealthChecks {
   451  		return
   452  	}
   453  	proxy := conn.Proxy()
   454  	// check if the WE already exists, update the status
   455  	entryName, autoCreate := proxy.WorkloadEntry()
   456  	if entryName == "" {
   457  		return
   458  	}
   459  
   460  	// if there is still an ads connection, do not unregister.
   461  	if remainingConnections := c.adsConnections.Disconnect(conn); remainingConnections {
   462  		return
   463  	}
   464  
   465  	proxy.RLock()
   466  	defer proxy.RUnlock()
   467  	workload := &workItem{
   468  		entryName:   entryName,
   469  		autoCreated: autoCreate,
   470  		proxy:       conn.Proxy(),
   471  		disConTime:  time.Now(),
   472  		origConTime: conn.ConnectedAt(),
   473  	}
   474  	// queue has max retry itself
   475  	c.queue.Add(workload)
   476  }
   477  
   478  func (c *Controller) unregisterWorkload(item any) error {
   479  	workItem, ok := item.(*workItem)
   480  	if !ok {
   481  		return nil
   482  	}
   483  
   484  	changed, err := c.changeWorkloadEntryStateToDisconnected(workItem.entryName, workItem.proxy, workItem.disConTime, workItem.origConTime)
   485  	if err != nil {
   486  		autoRegistrationErrors.Increment()
   487  		return err
   488  	}
   489  	if !changed {
   490  		return nil
   491  	}
   492  	log.Infof("updated auto-registered WorkloadEntry %s/%s as disconnected", workItem.proxy.Metadata.Namespace, workItem.entryName)
   493  
   494  	if workItem.autoCreated {
   495  		autoRegistrationUnregistrations.Increment()
   496  	}
   497  
   498  	// after grace period, check if the workload ever reconnected
   499  	ns := workItem.proxy.Metadata.Namespace
   500  	c.cleanupQueue.PushDelayed(func() error {
   501  		wle := c.store.Get(gvk.WorkloadEntry, workItem.entryName, ns)
   502  		if wle == nil {
   503  			return nil
   504  		}
   505  		if c.shouldCleanupEntry(*wle) {
   506  			c.cleanupEntry(*wle, false)
   507  		}
   508  		return nil
   509  	}, features.WorkloadEntryCleanupGracePeriod)
   510  	return nil
   511  }
   512  
   513  // QueueWorkloadEntryHealth enqueues the associated WorkloadEntries health status.
   514  func (c *Controller) QueueWorkloadEntryHealth(proxy *model.Proxy, event HealthEvent) {
   515  	if !features.WorkloadEntryHealthChecks {
   516  		return
   517  	}
   518  	c.healthController.QueueWorkloadEntryHealth(proxy, event)
   519  }
   520  
   521  // periodicWorkloadEntryCleanup checks lists all WorkloadEntry
   522  func (c *Controller) periodicWorkloadEntryCleanup(stopCh <-chan struct{}) {
   523  	if !features.WorkloadEntryAutoRegistration && !features.WorkloadEntryHealthChecks {
   524  		return
   525  	}
   526  	ticker := time.NewTicker(10 * features.WorkloadEntryCleanupGracePeriod)
   527  	defer ticker.Stop()
   528  	for {
   529  		select {
   530  		case <-ticker.C:
   531  			wles := c.store.List(gvk.WorkloadEntry, metav1.NamespaceAll)
   532  			for _, wle := range wles {
   533  				wle := wle
   534  				if c.shouldCleanupEntry(wle) {
   535  					c.cleanupQueue.Push(func() error {
   536  						c.cleanupEntry(wle, true)
   537  						return nil
   538  					})
   539  				}
   540  			}
   541  		case <-stopCh:
   542  			return
   543  		}
   544  	}
   545  }
   546  
   547  func (c *Controller) shouldCleanupEntry(wle config.Config) bool {
   548  	// don't clean up if WorkloadEntry is neither auto-registered
   549  	// nor health-checked
   550  	if !isAutoRegisteredWorkloadEntry(&wle) &&
   551  		!(isHealthCheckedWorkloadEntry(&wle) && health.HasHealthCondition(&wle)) {
   552  		return false
   553  	}
   554  
   555  	// If there is `istio.io/connectedAt` set, don't cleanup this workload entry.
   556  	// This may happen when the workload fast reconnects to the same istiod.
   557  	// 1. disconnect: the workload entry has been updated
   558  	// 2. connect: but the patch is based on the old workloadentry because of the propagation latency.
   559  	// So in this case the `istio.io/disconnectedAt` is still there and the cleanup procedure will go on.
   560  	connTime := wle.Annotations[annotation.IoIstioConnectedAt.Name]
   561  	if connTime != "" {
   562  		// handle workload leak when both workload/pilot down at the same time before pilot has a chance to set disconnTime
   563  		connAt, err := time.Parse(timeFormat, connTime)
   564  		if err == nil && uint64(time.Since(connAt)) > uint64(c.maxConnectionAge) {
   565  			return true
   566  		}
   567  		return false
   568  	}
   569  
   570  	disconnTime := wle.Annotations[annotation.IoIstioDisconnectedAt.Name]
   571  	if disconnTime == "" {
   572  		return false
   573  	}
   574  
   575  	disconnAt, err := time.Parse(timeFormat, disconnTime)
   576  	// if we haven't passed the grace period, don't cleanup
   577  	if err == nil && time.Since(disconnAt) < features.WorkloadEntryCleanupGracePeriod {
   578  		return false
   579  	}
   580  
   581  	return true
   582  }
   583  
   584  // cleanupEntry performs clean-up actions on a WorkloadEntry of a proxy that hasn't
   585  // reconnected within a grace period.
   586  func (c *Controller) cleanupEntry(wle config.Config, periodic bool) {
   587  	if err := c.cleanupLimit.Wait(context.TODO()); err != nil {
   588  		log.Errorf("error in WorkloadEntry cleanup rate limiter: %v", err)
   589  		return
   590  	}
   591  	if isAutoRegisteredWorkloadEntry(&wle) {
   592  		c.deleteEntry(wle, periodic)
   593  		return
   594  	}
   595  	if isHealthCheckedWorkloadEntry(&wle) && health.HasHealthCondition(&wle) {
   596  		c.deleteHealthCondition(wle, periodic)
   597  		return
   598  	}
   599  }
   600  
   601  // deleteEntry removes WorkloadEntry that was created automatically for a workload
   602  // that is using auto-registration.
   603  func (c *Controller) deleteEntry(wle config.Config, periodic bool) {
   604  	if err := c.store.Delete(gvk.WorkloadEntry, wle.Name, wle.Namespace, &wle.ResourceVersion); err != nil && !errors.IsNotFound(err) {
   605  		log.Warnf("failed cleaning up auto-registered WorkloadEntry %s/%s: %v", wle.Namespace, wle.Name, err)
   606  		autoRegistrationErrors.Increment()
   607  		return
   608  	}
   609  	autoRegistrationDeletes.Increment()
   610  	log.Infof("cleaned up auto-registered WorkloadEntry %s/%s periodic:%v", wle.Namespace, wle.Name, periodic)
   611  }
   612  
   613  // deleteHealthCondition updates WorkloadEntry of a workload that is not using auto-registration
   614  // to remove information about the health status (since we can no longer be certain about it).
   615  func (c *Controller) deleteHealthCondition(wle config.Config, periodic bool) {
   616  	err := c.stateStore.DeleteHealthCondition(wle)
   617  	if err != nil {
   618  		log.Warnf("failed cleaning up health-checked WorkloadEntry %s/%s: %v", wle.Namespace, wle.Name, err)
   619  		return
   620  	}
   621  	log.Infof("cleaned up health-checked WorkloadEntry %s/%s periodic:%v", wle.Namespace, wle.Name, periodic)
   622  }
   623  
   624  // IsControllerOf implements state.StoreCallbacks.
   625  func (c *Controller) IsControllerOf(wle *config.Config) bool {
   626  	if wle == nil {
   627  		return false
   628  	}
   629  	return wle.Annotations[annotation.IoIstioWorkloadController.Name] == c.instanceID
   630  }
   631  
   632  func autoregisteredWorkloadEntryName(proxy *model.Proxy) string {
   633  	if proxy.Metadata.AutoRegisterGroup == "" {
   634  		return ""
   635  	}
   636  	if len(proxy.IPAddresses) == 0 {
   637  		log.Errorf("auto-registration of %v failed: missing IP addresses", proxy.ID)
   638  		return ""
   639  	}
   640  	if len(proxy.Metadata.Namespace) == 0 {
   641  		log.Errorf("auto-registration of %v failed: missing namespace", proxy.ID)
   642  		return ""
   643  	}
   644  	p := []string{proxy.Metadata.AutoRegisterGroup, sanitizeIP(proxy.IPAddresses[0])}
   645  	if proxy.Metadata.Network != "" {
   646  		p = append(p, string(proxy.Metadata.Network))
   647  	}
   648  
   649  	name := strings.Join(p, "-")
   650  	if len(name) > 253 {
   651  		name = name[len(name)-253:]
   652  		log.Warnf("generated WorkloadEntry name is too long, consider making the WorkloadGroup name shorter. Shortening from beginning to: %s", name)
   653  	}
   654  	return name
   655  }
   656  
   657  // sanitizeIP ensures an IP address (IPv6) can be used in Kubernetes resource name
   658  func sanitizeIP(s string) string {
   659  	return strings.ReplaceAll(s, ":", "-")
   660  }
   661  
   662  func mergeLabels(labels ...map[string]string) map[string]string {
   663  	if len(labels) == 0 {
   664  		return map[string]string{}
   665  	}
   666  	out := make(map[string]string, len(labels)*len(labels[0]))
   667  	for _, lm := range labels {
   668  		for k, v := range lm {
   669  			out[k] = v
   670  		}
   671  	}
   672  	return out
   673  }
   674  
   675  var workloadGroupIsController = true
   676  
   677  func workloadEntryFromGroup(name string, proxy *model.Proxy, groupCfg *config.Config) *config.Config {
   678  	group := groupCfg.Spec.(*v1alpha3.WorkloadGroup)
   679  	entry := group.Template.DeepCopy()
   680  	entry.Address = proxy.IPAddresses[0]
   681  	// TODO move labels out of entry
   682  	// node metadata > WorkloadGroup.Metadata > WorkloadGroup.Template
   683  	if group.Metadata != nil && group.Metadata.Labels != nil {
   684  		entry.Labels = mergeLabels(entry.Labels, group.Metadata.Labels)
   685  	}
   686  	// Explicitly do not use proxy.Labels, as it is only initialized *after* we register the workload,
   687  	// and it would be circular, as it will set the labels based on the WorkloadEntry -- but we are creating
   688  	// the workload entry.
   689  	if proxy.Metadata.Labels != nil {
   690  		entry.Labels = mergeLabels(entry.Labels, proxy.Metadata.Labels)
   691  		// the label has been converted to "istio-locality: region/zone/subzone"
   692  		// in pilot/pkg/xds/ads.go, and `/` is not allowed in k8s label value.
   693  		// Instead of converting again, we delete it since has set WorkloadEntry.Locality
   694  		delete(entry.Labels, model.LocalityLabel)
   695  	}
   696  
   697  	annotations := map[string]string{annotation.IoIstioAutoRegistrationGroup.Name: groupCfg.Name}
   698  	if group.Metadata != nil && group.Metadata.Annotations != nil {
   699  		annotations = mergeLabels(annotations, group.Metadata.Annotations)
   700  	}
   701  
   702  	if proxy.Metadata.Network != "" {
   703  		entry.Network = string(proxy.Metadata.Network)
   704  	}
   705  	if proxy.Locality != nil {
   706  		entry.Locality = util.LocalityToString(proxy.Locality)
   707  	}
   708  	if proxy.Metadata.ProxyConfig != nil && proxy.Metadata.ProxyConfig.ReadinessProbe != nil {
   709  		annotations[status.WorkloadEntryHealthCheckAnnotation] = "true"
   710  	}
   711  	return &config.Config{
   712  		Meta: config.Meta{
   713  			GroupVersionKind: gvk.WorkloadEntry,
   714  			Name:             name,
   715  			Namespace:        proxy.Metadata.Namespace,
   716  			Labels:           entry.Labels,
   717  			Annotations:      annotations,
   718  			OwnerReferences: []metav1.OwnerReference{{
   719  				APIVersion: groupCfg.GroupVersionKind.GroupVersion(),
   720  				Kind:       groupCfg.GroupVersionKind.Kind,
   721  				Name:       groupCfg.Name,
   722  				UID:        kubetypes.UID(groupCfg.UID),
   723  				Controller: &workloadGroupIsController,
   724  			}},
   725  		},
   726  		Spec: entry,
   727  		// TODO status fields used for garbage collection
   728  		Status: nil,
   729  	}
   730  }
   731  
   732  func isAutoRegisteredWorkloadEntry(wle *config.Config) bool {
   733  	return wle != nil && wle.Annotations[annotation.IoIstioAutoRegistrationGroup.Name] != ""
   734  }
   735  
   736  func isHealthCheckedWorkloadEntry(wle *config.Config) bool {
   737  	return wle != nil && wle.Annotations[annotation.IoIstioWorkloadController.Name] != "" && !isAutoRegisteredWorkloadEntry(wle)
   738  }