github.com/Azure/aad-pod-identity@v1.8.17/pkg/mic/mic.go (about)

     1  package mic
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"reflect"
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	aadpodid "github.com/Azure/aad-pod-identity/pkg/apis/aadpodidentity"
    15  	"github.com/Azure/aad-pod-identity/pkg/cloudprovider"
    16  	"github.com/Azure/aad-pod-identity/pkg/crd"
    17  	"github.com/Azure/aad-pod-identity/pkg/filewatcher"
    18  	"github.com/Azure/aad-pod-identity/pkg/metrics"
    19  	"github.com/Azure/aad-pod-identity/pkg/pod"
    20  	"github.com/Azure/aad-pod-identity/pkg/stats"
    21  	"github.com/Azure/aad-pod-identity/pkg/utils"
    22  	"github.com/Azure/aad-pod-identity/version"
    23  
    24  	"github.com/fsnotify/fsnotify"
    25  	"golang.org/x/sync/semaphore"
    26  	corev1 "k8s.io/api/core/v1"
    27  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/client-go/informers"
    31  	"k8s.io/client-go/kubernetes"
    32  	"k8s.io/client-go/kubernetes/scheme"
    33  	typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
    34  	"k8s.io/client-go/rest"
    35  	"k8s.io/client-go/tools/leaderelection"
    36  	"k8s.io/client-go/tools/leaderelection/resourcelock"
    37  	"k8s.io/client-go/tools/record"
    38  	"k8s.io/klog/v2"
    39  )
    40  
    41  const (
    42  	stopped = int32(0)
    43  	running = int32(1)
    44  )
    45  
    46  // NodeGetter is an abstraction used to get Kubernetes node info.
    47  type NodeGetter interface {
    48  	Get(name string) (*corev1.Node, error)
    49  	Start(<-chan struct{})
    50  }
    51  
    52  // TypeUpgradeConfig - configuration aspects of type related changes required for client-go upgrade.
    53  type TypeUpgradeConfig struct {
    54  	// Key in the config map which indicates if a type upgrade has been performed.
    55  	TypeUpgradeStatusKey string
    56  	EnableTypeUpgrade    bool
    57  }
    58  
    59  // CMConfig - config map for aad-pod-identity
    60  type CMConfig struct {
    61  	Namespace string
    62  	Name      string
    63  }
    64  
    65  // LeaderElectionConfig - used to keep track of leader election config.
    66  type LeaderElectionConfig struct {
    67  	Namespace string
    68  	Name      string
    69  	Duration  time.Duration
    70  	Instance  string
    71  }
    72  
    73  // UpdateUserMSIConfig - parameters for retrying cloudprovider's UpdateUserMSI function
    74  type UpdateUserMSIConfig struct {
    75  	MaxRetry      int
    76  	RetryInterval time.Duration
    77  }
    78  
    79  // Client has the required pointers to talk to the api server
    80  // and interact with the CRD related data structure.
    81  type Client struct {
    82  	CRDClient                           crd.ClientInt
    83  	CloudClient                         cloudprovider.ClientInt
    84  	PodClient                           pod.ClientInt
    85  	CloudConfigWatcher                  filewatcher.ClientInt
    86  	EventRecorder                       record.EventRecorder
    87  	EventChannel                        chan aadpodid.EventType
    88  	NodeClient                          NodeGetter
    89  	IsNamespaced                        bool
    90  	SyncLoopStarted                     bool
    91  	syncRetryInterval                   time.Duration
    92  	createDeleteBatch                   int64
    93  	ImmutableUserMSIsMap                map[string]bool
    94  	identityAssignmentReconcileInterval time.Duration
    95  
    96  	syncing int32 // protect against concurrent sync's
    97  
    98  	leaderElector *leaderelection.LeaderElector
    99  	*LeaderElectionConfig
   100  	Reporter       *metrics.Reporter
   101  	TypeUpgradeCfg *TypeUpgradeConfig
   102  	CMCfg          *CMConfig
   103  	CMClient       typedcorev1.ConfigMapInterface
   104  }
   105  
   106  // Config - MIC Config
   107  type Config struct {
   108  	CloudCfgPath                        string
   109  	RestConfig                          *rest.Config
   110  	IsNamespaced                        bool
   111  	SyncRetryInterval                   time.Duration
   112  	LeaderElectionCfg                   *LeaderElectionConfig
   113  	CreateDeleteBatch                   int64
   114  	ImmutableUserMSIsList               []string
   115  	CMcfg                               *CMConfig
   116  	TypeUpgradeCfg                      *TypeUpgradeConfig
   117  	UpdateUserMSICfg                    *UpdateUserMSIConfig
   118  	IdentityAssignmentReconcileInterval time.Duration
   119  }
   120  
   121  // ClientInt is an abstraction used to perform an MIC sync cycle.
   122  type ClientInt interface {
   123  	Start(exit <-chan struct{})
   124  	Sync(exit <-chan struct{})
   125  }
   126  
   127  type trackUserAssignedMSIIds struct {
   128  	addUserAssignedMSIIDs    []string
   129  	removeUserAssignedMSIIDs []string
   130  	assignedIDsToCreate      []aadpodid.AzureAssignedIdentity
   131  	assignedIDsToDelete      []aadpodid.AzureAssignedIdentity
   132  	assignedIDsToUpdate      []aadpodid.AzureAssignedIdentity
   133  	isvmss                   bool
   134  }
   135  
   136  // NewMICClient returns new mic client
   137  func NewMICClient(cfg *Config) (*Client, error) {
   138  	klog.Infof("starting to create the pod identity client. Version: %v. Build date: %v", version.MICVersion, version.BuildDate)
   139  
   140  	clientSet := kubernetes.NewForConfigOrDie(cfg.RestConfig)
   141  
   142  	k8sVersion, err := clientSet.ServerVersion()
   143  	if err == nil {
   144  		klog.Infof("Kubernetes server version: %s", k8sVersion.String())
   145  	}
   146  
   147  	informer := informers.NewSharedInformerFactory(clientSet, 30*time.Second)
   148  
   149  	cloudClient, err := cloudprovider.NewCloudProvider(cfg.CloudCfgPath, cfg.UpdateUserMSICfg.MaxRetry, cfg.UpdateUserMSICfg.RetryInterval)
   150  	if err != nil {
   151  		return nil, err
   152  	}
   153  	klog.V(1).Infof("cloud provider initialized")
   154  
   155  	eventCh := make(chan aadpodid.EventType, 100)
   156  
   157  	crdClient, err := crd.NewCRDClient(cfg.RestConfig, eventCh)
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  	klog.V(1).Infof("CRD client initialized")
   162  
   163  	podClient := pod.NewPodClient(informer, eventCh)
   164  	klog.V(1).Infof("pod Client initialized")
   165  
   166  	cloudConfigWatcher, err := filewatcher.NewFileWatcher(
   167  		func(event fsnotify.Event) {
   168  			if event.Op&fsnotify.Write == fsnotify.Write {
   169  				if err := cloudClient.Init(); err != nil {
   170  					return
   171  				}
   172  				klog.V(1).Infof("cloud provider re-initialized")
   173  			}
   174  		}, func(err error) {
   175  			klog.Errorf("failed to handle fsnotify event, error: %+v", err)
   176  		})
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  	if err := cloudConfigWatcher.Add(cfg.CloudCfgPath); err != nil {
   181  		return nil, err
   182  	}
   183  	klog.V(1).Infof("cloud config watcher initialized")
   184  
   185  	eventBroadcaster := record.NewBroadcaster()
   186  	eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientSet.CoreV1().Events("")})
   187  	recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: aadpodid.CRDGroup})
   188  
   189  	immutableUserMSIsMap := make(map[string]bool)
   190  	if len(cfg.ImmutableUserMSIsList) > 0 {
   191  		for _, item := range cfg.ImmutableUserMSIsList {
   192  			immutableUserMSIsMap[strings.ToLower(item)] = true
   193  		}
   194  	}
   195  	// Cluster identity used for cloud provider operations is also immutable.
   196  	// For clusters created with managed identity, the cluster identity is used for all
   197  	// cloud provider operations and is also used by MIC. If the user configures the cluster
   198  	// identity to be used by pod, we should not delete it when all pods are deleted.
   199  	clusterIdentity := cloudClient.GetClusterIdentity()
   200  	if clusterIdentity != "" {
   201  		immutableUserMSIsMap[clusterIdentity] = true
   202  	}
   203  	var cmClient typedcorev1.ConfigMapInterface
   204  	if cfg.TypeUpgradeCfg.EnableTypeUpgrade {
   205  		cmClient = clientSet.CoreV1().ConfigMaps(cfg.CMcfg.Namespace)
   206  	}
   207  
   208  	c := &Client{
   209  		CRDClient:                           crdClient,
   210  		CloudClient:                         cloudClient,
   211  		PodClient:                           podClient,
   212  		CloudConfigWatcher:                  cloudConfigWatcher,
   213  		EventRecorder:                       recorder,
   214  		EventChannel:                        eventCh,
   215  		NodeClient:                          &NodeClient{informer.Core().V1().Nodes()},
   216  		IsNamespaced:                        cfg.IsNamespaced,
   217  		syncRetryInterval:                   cfg.SyncRetryInterval,
   218  		createDeleteBatch:                   cfg.CreateDeleteBatch,
   219  		ImmutableUserMSIsMap:                immutableUserMSIsMap,
   220  		TypeUpgradeCfg:                      cfg.TypeUpgradeCfg,
   221  		CMCfg:                               cfg.CMcfg,
   222  		CMClient:                            cmClient,
   223  		identityAssignmentReconcileInterval: cfg.IdentityAssignmentReconcileInterval,
   224  	}
   225  
   226  	leaderElector, err := c.NewLeaderElector(clientSet, recorder, cfg.LeaderElectionCfg)
   227  	if err != nil {
   228  		return nil, fmt.Errorf("failed to create new leader elector, error: %+v", err)
   229  	}
   230  	c.leaderElector = leaderElector
   231  
   232  	reporter, err := metrics.NewReporter()
   233  	if err != nil {
   234  		return nil, fmt.Errorf("failed to create reporter for metrics, error: %+v", err)
   235  	}
   236  	c.Reporter = reporter
   237  	return c, nil
   238  }
   239  
   240  // Run - Initiates the leader election run call to find if its leader and run it
   241  func (c *Client) Run() {
   242  	klog.Info("initiating MIC Leader election")
   243  	// counter to track number of mic election
   244  	c.Reporter.Report(metrics.MICNewLeaderElectionCountM.M(1))
   245  	c.leaderElector.Run(context.Background())
   246  }
   247  
   248  // NewLeaderElector - does the required leader election initialization
   249  func (c *Client) NewLeaderElector(clientSet *kubernetes.Clientset, recorder record.EventRecorder, leaderElectionConfig *LeaderElectionConfig) (*leaderelection.LeaderElector, error) {
   250  	c.LeaderElectionConfig = leaderElectionConfig
   251  	resourceLock, err := resourcelock.New(resourcelock.EndpointsResourceLock,
   252  		c.Namespace,
   253  		c.Name,
   254  		clientSet.CoreV1(),
   255  		clientSet.CoordinationV1(),
   256  		resourcelock.ResourceLockConfig{
   257  			Identity:      c.Instance,
   258  			EventRecorder: recorder})
   259  	if err != nil {
   260  		return nil, fmt.Errorf("failed to create resource lock for leader election, error: %+v", err)
   261  	}
   262  	config := leaderelection.LeaderElectionConfig{
   263  		LeaseDuration: c.Duration,
   264  		RenewDeadline: c.Duration / 2,
   265  		RetryPeriod:   c.Duration / 4,
   266  		Callbacks: leaderelection.LeaderCallbacks{
   267  			OnStartedLeading: func(ctx context.Context) {
   268  				c.Start(ctx.Done())
   269  			},
   270  			OnStoppedLeading: func() {
   271  				klog.Error("lost leader lease")
   272  				klog.Flush()
   273  				os.Exit(1)
   274  			},
   275  		},
   276  		Lock: resourceLock,
   277  	}
   278  
   279  	leaderElector, err := leaderelection.NewLeaderElector(config)
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  	return leaderElector, nil
   284  }
   285  
   286  // UpgradeTypeIfRequired performs type upgrade for all aad-pod-identity CRDs if required.
   287  func (c *Client) UpgradeTypeIfRequired() error {
   288  	if c.TypeUpgradeCfg.EnableTypeUpgrade {
   289  		cm, err := c.CMClient.Get(context.TODO(), c.CMCfg.Name, v1.GetOptions{})
   290  		// If we get an error and its not NotFound then return, because we cannot proceed.
   291  		if err != nil && !apierrors.IsNotFound(err) {
   292  			return fmt.Errorf("failed to get ConfigMap %s/%s, error: %+v", c.CMCfg.Namespace, c.CMCfg.Name, err)
   293  		}
   294  
   295  		// Now either the configmap is not there or we successfully got the configmap
   296  		// Handle the case where the configmap is not found.
   297  		if err != nil && apierrors.IsNotFound(err) {
   298  			// Create the configmap
   299  			newCfgMap := &corev1.ConfigMap{
   300  				ObjectMeta: v1.ObjectMeta{
   301  					Namespace: c.CMCfg.Namespace,
   302  					Name:      c.CMCfg.Name,
   303  				},
   304  			}
   305  			if cm, err = c.CMClient.Create(context.TODO(), newCfgMap, metav1.CreateOptions{}); err != nil {
   306  				return fmt.Errorf("failed to create ConfigMap %s/%s, error: %+v", c.CMCfg.Namespace, c.CMCfg.Name, err)
   307  			}
   308  		}
   309  
   310  		// We reach here only if the configmap is present or we created new one.
   311  		// Check if the key for type upgrade is present. If the key is present,
   312  		// then the upgrade is already performed. If not then go through the type upgrade
   313  		// process.
   314  		if v, ok := cm.Data[c.TypeUpgradeCfg.TypeUpgradeStatusKey]; !ok {
   315  			klog.Infof("upgrading the types to work with case sensitive go-client")
   316  			if err := c.CRDClient.UpgradeAll(); err != nil {
   317  				return fmt.Errorf("failed to upgrade type, error: %+v", err)
   318  			}
   319  			klog.Infof("type upgrade completed !!")
   320  			// Upgrade completed so update the data with the upgrade key.
   321  			if cm.Data == nil {
   322  				cm.Data = make(map[string]string)
   323  			}
   324  			cm.Data[c.TypeUpgradeCfg.TypeUpgradeStatusKey] = version.MICVersion
   325  			_, err = c.CMClient.Update(context.TODO(), cm, metav1.UpdateOptions{})
   326  			if err != nil {
   327  				return fmt.Errorf("failed to update ConfigMap key %s failed, error: %+v", c.TypeUpgradeCfg.TypeUpgradeStatusKey, err)
   328  			}
   329  		} else {
   330  			klog.Infof("type upgrade status configmap found from version: %s. Skipping type upgrade!", v)
   331  		}
   332  	}
   333  	return nil
   334  }
   335  
   336  // Start starts various go routines to watch for any relevant changes that would trigger a MIC sync.
   337  func (c *Client) Start(exit <-chan struct{}) {
   338  	klog.V(6).Infof("MIC client starting..")
   339  
   340  	if err := c.UpgradeTypeIfRequired(); err != nil {
   341  		klog.Fatalf("type upgrade failed with error: %+v", err)
   342  		return
   343  	}
   344  
   345  	var wg sync.WaitGroup
   346  
   347  	wg.Add(1)
   348  	go func() {
   349  		c.PodClient.Start(exit)
   350  		klog.V(6).Infof("pod client started")
   351  		wg.Done()
   352  	}()
   353  
   354  	wg.Add(1)
   355  	go func() {
   356  		c.CRDClient.Start(exit)
   357  		klog.V(6).Infof("CRD client started")
   358  		wg.Done()
   359  	}()
   360  
   361  	wg.Add(1)
   362  	go func() {
   363  		c.NodeClient.Start(exit)
   364  		klog.V(6).Infof("node client started")
   365  		wg.Done()
   366  	}()
   367  
   368  	wg.Add(1)
   369  	go func() {
   370  		c.CloudConfigWatcher.Start(exit)
   371  		klog.V(6).Infof("cloud config watcher started")
   372  		wg.Done()
   373  	}()
   374  
   375  	wg.Wait()
   376  	go c.Sync(exit)
   377  }
   378  
   379  func (c *Client) canSync() bool {
   380  	return atomic.CompareAndSwapInt32(&c.syncing, stopped, running)
   381  }
   382  
   383  func (c *Client) setStopped() {
   384  	atomic.StoreInt32(&c.syncing, stopped)
   385  }
   386  
   387  // Sync perform a sync cycle.
   388  func (c *Client) Sync(exit <-chan struct{}) {
   389  	if !c.canSync() {
   390  		panic("concurrent syncs")
   391  	}
   392  	defer c.setStopped()
   393  
   394  	ticker := time.NewTicker(c.syncRetryInterval)
   395  	defer ticker.Stop()
   396  
   397  	identityAssignmentReconcileTicker := time.NewTicker(c.identityAssignmentReconcileInterval)
   398  	defer identityAssignmentReconcileTicker.Stop()
   399  
   400  	klog.Info("sync thread started.")
   401  	c.SyncLoopStarted = true
   402  	var event aadpodid.EventType
   403  	totalWorkDoneCycles := 0
   404  	totalSyncCycles := 0
   405  
   406  	for {
   407  		select {
   408  		case <-exit:
   409  			return
   410  		case event = <-c.EventChannel:
   411  			klog.V(6).Infof("received event: %v", event)
   412  		case <-ticker.C:
   413  			klog.V(6).Infof("running periodic sync loop")
   414  		case <-identityAssignmentReconcileTicker.C:
   415  			klog.V(6).Infof("reconciling identity assignment on Azure")
   416  			c.reconcileIdentityAssignment()
   417  			continue
   418  		}
   419  		totalSyncCycles++
   420  		stats.Init()
   421  		// This is the only place where the AzureAssignedIdentity creation is initiated.
   422  		begin := time.Now()
   423  		workDone := false
   424  
   425  		cacheTime := time.Now()
   426  
   427  		// There is a delay in data propagation to cache. It's possible that the creates performed in the previous sync cycle
   428  		// are not propagated before this sync cycle began. In order to avoid redoing the cycle, we sync cache again.
   429  		c.CRDClient.SyncCacheAll(exit, false)
   430  		stats.Put(stats.CacheSync, time.Since(cacheTime))
   431  
   432  		// List all pods in all namespaces
   433  		systemTime := time.Now()
   434  		listPods, err := c.PodClient.GetPods()
   435  		if err != nil {
   436  			klog.Errorf("failed to list pods, error: %+v", err)
   437  			continue
   438  		}
   439  		listBindings, err := c.CRDClient.ListBindings()
   440  		if err != nil {
   441  			continue
   442  		}
   443  		klog.V(6).Infof("number of bindings: %d", len(*listBindings))
   444  		listIDs, err := c.CRDClient.ListIds()
   445  		if err != nil {
   446  			continue
   447  		}
   448  		klog.V(6).Infof("number of identities: %d", len(*listIDs))
   449  		idMap, err := c.convertIDListToMap(*listIDs)
   450  		if err != nil {
   451  			klog.Errorf("failed to convert ID list to map, error: %+v", err)
   452  			continue
   453  		}
   454  
   455  		currentAssignedIDs, err := c.CRDClient.ListAssignedIDsInMap()
   456  		if err != nil {
   457  			continue
   458  		}
   459  		klog.V(6).Infof("number of assigned identities: %d", len(currentAssignedIDs))
   460  		stats.Put(stats.System, time.Since(systemTime))
   461  
   462  		beginNewListTime := time.Now()
   463  		newAssignedIDs, nodeRefs, err := c.createDesiredAssignedIdentityList(listPods, listBindings, idMap)
   464  		if err != nil {
   465  			klog.Errorf("failed to create a list of desired AzureAssignedIdentity, error: %+v", err)
   466  			continue
   467  		}
   468  		stats.Put(stats.CurrentState, time.Since(beginNewListTime))
   469  
   470  		// Extract add list and delete list based on existing assigned ids in the system (currentAssignedIDs).
   471  		// and the ones we have arrived at in the volatile list (newAssignedIDs).
   472  		addList, err := c.getAzureAssignedIDsToCreate(currentAssignedIDs, newAssignedIDs)
   473  		if err != nil {
   474  			klog.Errorf("failed to get a list of AzureAssignedIdentities to create, error: %+v", err)
   475  			continue
   476  		}
   477  		deleteList, err := c.getAzureAssignedIDsToDelete(currentAssignedIDs, newAssignedIDs)
   478  		if err != nil {
   479  			klog.Errorf("failed to get a list of AzureAssignedIdentities to delete, error: %+v", err)
   480  			continue
   481  		}
   482  		beforeUpdateList, afterUpdateList := c.getAzureAssignedIdentitiesToUpdate(addList, deleteList)
   483  		klog.V(5).Infof("del: %v, add: %v, update: %v", deleteList, addList, afterUpdateList)
   484  
   485  		// the node map is used to track assigned ids to create/delete, identities to assign/remove
   486  		// for each node or vmss
   487  		nodeMap := make(map[string]trackUserAssignedMSIIds)
   488  
   489  		// separate the add, delete and update list per node
   490  		c.convertAssignedIDListToMap(addList, deleteList, afterUpdateList, nodeMap)
   491  
   492  		// process the delete and add list
   493  		// determine the list of identities that need to updated, create a node to identity list mapping for add and delete
   494  		if len(deleteList) > 0 || len(beforeUpdateList) > 0 {
   495  			workDone = true
   496  			c.getListOfIdsToDelete(deleteList, beforeUpdateList, afterUpdateList, newAssignedIDs, nodeMap, nodeRefs)
   497  		}
   498  		if len(addList) > 0 || len(afterUpdateList) > 0 {
   499  			workDone = true
   500  			c.getListOfIdsToAssign(addList, afterUpdateList, nodeMap)
   501  		}
   502  
   503  		var wg sync.WaitGroup
   504  
   505  		// check if vmss and consolidate vmss nodes into vmss if necessary
   506  		c.consolidateVMSSNodes(nodeMap, &wg)
   507  
   508  		// one final createorupdate to each node or vmss in the map
   509  		c.updateNodeAndDeps(newAssignedIDs, nodeMap, nodeRefs, &wg)
   510  
   511  		wg.Wait()
   512  
   513  		if workDone || ((totalSyncCycles % 1000) == 0) {
   514  			if workDone {
   515  				totalWorkDoneCycles++
   516  			}
   517  			idsFound := 0
   518  			bindingsFound := 0
   519  			if listIDs != nil {
   520  				idsFound = len(*listIDs)
   521  			}
   522  			if listBindings != nil {
   523  				bindingsFound = len(*listBindings)
   524  			}
   525  			klog.Infof("work done: %v. Found %d pods, %d ids, %d bindings", workDone, len(listPods), idsFound, bindingsFound)
   526  			klog.Infof("total work cycles: %d, out of which work was done in: %d", totalSyncCycles, totalWorkDoneCycles)
   527  			stats.Put(stats.Total, time.Since(begin))
   528  
   529  			c.Reporter.Report(
   530  				metrics.MICCycleCountM.M(1),
   531  				metrics.MICCycleDurationM.M(metrics.SinceInSeconds(begin)))
   532  
   533  			stats.PrintSync()
   534  			if workDone {
   535  				// We need to synchronize the cache inorder to get the latest updates.
   536  				// Even though we sync at the beginning of every cycle, we are still seeing
   537  				// conflicts indicating the assigned identities are not reflecting in
   538  				// the cache. Continue to use the sleep workaround.
   539  				time.Sleep(time.Millisecond * 200)
   540  			}
   541  		}
   542  	}
   543  }
   544  
   545  func (c *Client) convertAssignedIDListToMap(addList, deleteList, updateList map[string]aadpodid.AzureAssignedIdentity, nodeMap map[string]trackUserAssignedMSIIds) {
   546  	for _, createID := range addList {
   547  		if trackList, ok := nodeMap[createID.Spec.NodeName]; ok {
   548  			trackList.assignedIDsToCreate = append(trackList.assignedIDsToCreate, createID)
   549  			nodeMap[createID.Spec.NodeName] = trackList
   550  			continue
   551  		}
   552  		nodeMap[createID.Spec.NodeName] = trackUserAssignedMSIIds{assignedIDsToCreate: []aadpodid.AzureAssignedIdentity{createID}}
   553  	}
   554  
   555  	for _, delID := range deleteList {
   556  		if trackList, ok := nodeMap[delID.Spec.NodeName]; ok {
   557  			trackList.assignedIDsToDelete = append(trackList.assignedIDsToDelete, delID)
   558  			nodeMap[delID.Spec.NodeName] = trackList
   559  			continue
   560  		}
   561  		nodeMap[delID.Spec.NodeName] = trackUserAssignedMSIIds{assignedIDsToDelete: []aadpodid.AzureAssignedIdentity{delID}}
   562  	}
   563  
   564  	for _, updateID := range updateList {
   565  		if trackList, ok := nodeMap[updateID.Spec.NodeName]; ok {
   566  			trackList.assignedIDsToUpdate = append(trackList.assignedIDsToUpdate, updateID)
   567  			nodeMap[updateID.Spec.NodeName] = trackList
   568  			continue
   569  		}
   570  		nodeMap[updateID.Spec.NodeName] = trackUserAssignedMSIIds{assignedIDsToUpdate: []aadpodid.AzureAssignedIdentity{updateID}}
   571  	}
   572  }
   573  
   574  func (c *Client) createDesiredAssignedIdentityList(
   575  	listPods []*corev1.Pod, listBindings *[]aadpodid.AzureIdentityBinding, idMap map[string]aadpodid.AzureIdentity) (map[string]aadpodid.AzureAssignedIdentity, map[string]bool, error) {
   576  	// For each pod, check what bindings are matching. For each binding create volatile azure assigned identity.
   577  	// Compare this list with the current list of azure assigned identities.
   578  	// For any new assigned identities found in this volatile list, create assigned identity and assign user assigned msis.
   579  	// For any assigned ids not present the volatile list, proceed with the deletion.
   580  	nodeRefs := make(map[string]bool)
   581  	newAssignedIDs := make(map[string]aadpodid.AzureAssignedIdentity)
   582  
   583  	for _, pod := range listPods {
   584  		klog.V(6).Infof("checking pod %s/%s", pod.Namespace, pod.Name)
   585  		if pod.Spec.NodeName == "" {
   586  			// Node is not yet allocated. In that case skip the pod
   587  			klog.Infof("pod %s/%s has no assigned node yet. it will be ignored", pod.Namespace, pod.Name)
   588  			continue
   589  		}
   590  		crdPodLabelVal := pod.Labels[aadpodid.CRDLabelKey]
   591  		klog.V(6).Infof("pod: %s/%s. Label value: %v", pod.Namespace, pod.Name, crdPodLabelVal)
   592  		if crdPodLabelVal == "" {
   593  			// No binding mentioned in the label. Just continue to the next pod
   594  			klog.Infof("pod %s/%s doesn't contain %s label field. it will be ignored", pod.Namespace, pod.Name, aadpodid.CRDLabelKey)
   595  			continue
   596  		}
   597  		var matchedBindings []aadpodid.AzureIdentityBinding
   598  		for _, allBinding := range *listBindings {
   599  			klog.V(6).Infof("check the binding (pod - %s/%s): %s", pod.Namespace, pod.Name, allBinding.Spec.Selector)
   600  			if allBinding.Spec.Selector == crdPodLabelVal {
   601  				klog.V(5).Infof("found binding match for pod %s/%s with binding %s/%s", pod.Namespace, pod.Name, allBinding.Namespace, allBinding.Name)
   602  				matchedBindings = append(matchedBindings, allBinding)
   603  				nodeRefs[pod.Spec.NodeName] = true
   604  			}
   605  		}
   606  
   607  		if len(matchedBindings) == 0 {
   608  			klog.Infof("No AzureIdentityBinding found for pod %s/%s that matches selector: %s. it will be ignored", pod.Namespace, pod.Name, crdPodLabelVal)
   609  			continue
   610  		}
   611  
   612  		// sort all matching bindings so we can iterate the slice
   613  		// in an deterministic fashion in different sync cycles
   614  		sort.Sort(aadpodid.AzureIdentityBindings(matchedBindings))
   615  
   616  		for _, binding := range matchedBindings {
   617  			klog.V(5).Infof("looking up id map: %s/%s", binding.Namespace, binding.Spec.AzureIdentity)
   618  			if azureID, idPresent := idMap[getIDKey(binding.Namespace, binding.Spec.AzureIdentity)]; idPresent {
   619  				// working in Namespaced mode or this specific identity is namespaced
   620  				if c.IsNamespaced || aadpodid.IsNamespacedIdentity(&azureID) {
   621  					// They have to match all
   622  					if !(azureID.Namespace == binding.Namespace && binding.Namespace == pod.Namespace) {
   623  						klog.V(5).Infof("identity %s/%s was matched via binding %s/%s to %s/%s but namespaced identity is enforced, so it will be ignored",
   624  							azureID.Namespace, azureID.Name, binding.Namespace, binding.Name, pod.Namespace, pod.Name)
   625  						continue
   626  					}
   627  				}
   628  				klog.V(5).Infof("identity %s/%s assigned to %s/%s via %s/%s", azureID.Namespace, azureID.Name, pod.Namespace, pod.Name, binding.Namespace, binding.Name)
   629  				assignedID, err := c.makeAssignedIDs(azureID, binding, pod.Name, pod.Namespace, pod.Spec.NodeName)
   630  
   631  				if err != nil {
   632  					klog.Errorf("failed to create an AzureAssignedIdentity between pod %s/%s and AzureIdentity %s/%s, error: %+v", pod.Namespace, pod.Name, azureID.Namespace, azureID.Name, err)
   633  					continue
   634  				}
   635  
   636  				if a, ok := newAssignedIDs[assignedID.Name]; ok {
   637  					// see https://github.com/Azure/aad-pod-identity/issues/1065
   638  					klog.Warningf("AzureIdentity %s exists in both %s and %s namespace. Considering renaming it or enabling Namespace mode (https://azure.github.io/aad-pod-identity/docs/configure/match_pods_in_namespace)",
   639  						azureID.Name, a.Spec.AzureIdentityRef.Namespace, azureID.Namespace)
   640  				} else {
   641  					newAssignedIDs[assignedID.Name] = *assignedID
   642  				}
   643  			} else {
   644  				// This is the case where the identity has been deleted.
   645  				// In such a case, we will skip it from matching binding.
   646  				// This will ensure that the new assigned ids created will not have the
   647  				// one associated with this azure identity.
   648  				klog.Infof("%s identity not found when using %s/%s binding", binding.Spec.AzureIdentity, binding.Namespace, binding.Name)
   649  			}
   650  		}
   651  	}
   652  	return newAssignedIDs, nodeRefs, nil
   653  }
   654  
   655  // getListOfIdsToDelete will go over the delete list to determine if the id is required to be deleted
   656  // only user assigned identity not in use are added to the remove list for the node
   657  func (c *Client) getListOfIdsToDelete(deleteList, beforeUpdateList, afterUpdateList, newAssignedIDs map[string]aadpodid.AzureAssignedIdentity,
   658  	nodeMap map[string]trackUserAssignedMSIIds,
   659  	nodeRefs map[string]bool) {
   660  	vmssGroups, err := getVMSSGroups(c.NodeClient, nodeRefs)
   661  	if err != nil {
   662  		klog.Errorf("failed to get VMSS groups, error: %+v", err)
   663  		return
   664  	}
   665  
   666  	consolidatedMapToCheck := make(map[string]aadpodid.AzureAssignedIdentity)
   667  	for name, id := range newAssignedIDs {
   668  		consolidatedMapToCheck[name] = id
   669  	}
   670  	for name, id := range afterUpdateList {
   671  		consolidatedMapToCheck[name] = id
   672  	}
   673  
   674  	for _, delID := range deleteList {
   675  		err := c.shouldRemoveID(delID, consolidatedMapToCheck, nodeMap, vmssGroups)
   676  		if err != nil {
   677  			klog.Errorf("failed to check if identity should be removed, error: %+v", err)
   678  		}
   679  	}
   680  	// this loop checks the azure identity before it was updated and cleans up
   681  	// the old identity
   682  	for _, oldUpdateID := range beforeUpdateList {
   683  		err := c.shouldRemoveID(oldUpdateID, consolidatedMapToCheck, nodeMap, vmssGroups)
   684  		if err != nil {
   685  			klog.Errorf("failed to check if identity should be removed, error: %+v", err)
   686  		}
   687  	}
   688  }
   689  
   690  // getListOfIdsToAssign will add the id to the append list for node if it's user assigned identity
   691  func (c *Client) getListOfIdsToAssign(addList, updateList map[string]aadpodid.AzureAssignedIdentity, nodeMap map[string]trackUserAssignedMSIIds) {
   692  	for _, createID := range addList {
   693  		c.shouldAssignID(createID, nodeMap)
   694  	}
   695  	for _, updateID := range updateList {
   696  		c.shouldAssignID(updateID, nodeMap)
   697  	}
   698  }
   699  
   700  func (c *Client) shouldAssignID(assignedID aadpodid.AzureAssignedIdentity, nodeMap map[string]trackUserAssignedMSIIds) {
   701  	id := assignedID.Spec.AzureIdentityRef
   702  	isUserAssignedMSI := c.checkIfUserAssignedMSI(*id)
   703  
   704  	if assignedID.Status.Status == "" || assignedID.Status.Status == aadpodid.AssignedIDCreated {
   705  		if isUserAssignedMSI {
   706  			c.appendToAddListForNode(id.Spec.ResourceID, assignedID.Spec.NodeName, nodeMap)
   707  		}
   708  	}
   709  	klog.V(5).Infof("binding applied: %+v", assignedID.Spec.AzureBindingRef)
   710  }
   711  
   712  func (c *Client) shouldRemoveID(assignedID aadpodid.AzureAssignedIdentity,
   713  	newAssignedIDs map[string]aadpodid.AzureAssignedIdentity,
   714  	nodeMap map[string]trackUserAssignedMSIIds, vmssGroups *vmssGroupList) error {
   715  	klog.V(5).Infof("deletion of id: %s", assignedID.Name)
   716  	inUse, err := c.checkIfInUse(assignedID, newAssignedIDs, vmssGroups)
   717  	if err != nil {
   718  		return err
   719  	}
   720  
   721  	id := assignedID.Spec.AzureIdentityRef
   722  	isUserAssignedMSI := c.checkIfUserAssignedMSI(*id)
   723  	isImmutableIdentity := c.checkIfIdentityImmutable(id.Spec.ClientID)
   724  	// this case includes Assigned state and empty state to ensure backward compatibility
   725  	if assignedID.Status.Status == aadpodid.AssignedIDAssigned || assignedID.Status.Status == "" {
   726  		// only user assigned identities that are not in use and are not defined as
   727  		// immutable will be removed from underlying node/vmss
   728  		if !inUse && isUserAssignedMSI && !isImmutableIdentity {
   729  			c.appendToRemoveListForNode(id.Spec.ResourceID, assignedID.Spec.NodeName, nodeMap)
   730  		}
   731  	}
   732  	klog.V(5).Infof("binding removed: %+v", assignedID.Spec.AzureBindingRef)
   733  	return nil
   734  }
   735  
   736  func (c *Client) matchAssignedID(x aadpodid.AzureAssignedIdentity, y aadpodid.AzureAssignedIdentity) bool {
   737  	bindingX := x.Spec.AzureBindingRef
   738  	bindingY := y.Spec.AzureBindingRef
   739  
   740  	idX := x.Spec.AzureIdentityRef
   741  	idY := y.Spec.AzureIdentityRef
   742  
   743  	klog.V(7).Infof("assignedidX - %+v\n", x)
   744  	klog.V(7).Infof("assignedidY - %+v\n", y)
   745  
   746  	klog.V(7).Infof("bindingX - %+v\n", bindingX)
   747  	klog.V(7).Infof("bindingY - %+v\n", bindingY)
   748  
   749  	klog.V(7).Infof("idX - %+v\n", idX)
   750  	klog.V(7).Infof("idY - %+v\n", idY)
   751  
   752  	return bindingX.Name == bindingY.Name &&
   753  		bindingX.ResourceVersion == bindingY.ResourceVersion &&
   754  		idX.Name == idY.Name &&
   755  		idX.ResourceVersion == idY.ResourceVersion &&
   756  		x.Spec.Pod == y.Spec.Pod &&
   757  		x.Spec.PodNamespace == y.Spec.PodNamespace &&
   758  		x.Spec.NodeName == y.Spec.NodeName
   759  }
   760  
   761  func (c *Client) getAzureAssignedIDsToCreate(old, new map[string]aadpodid.AzureAssignedIdentity) (map[string]aadpodid.AzureAssignedIdentity, error) {
   762  	// everything in new needs to be created
   763  	if len(old) == 0 {
   764  		return new, nil
   765  	}
   766  
   767  	create := make(map[string]aadpodid.AzureAssignedIdentity)
   768  	begin := time.Now()
   769  
   770  	for assignedIDName, newAssignedID := range new {
   771  		oldAssignedID, exists := old[assignedIDName]
   772  		idMatch := false
   773  		if exists {
   774  			idMatch = c.matchAssignedID(oldAssignedID, newAssignedID)
   775  		}
   776  		if idMatch && oldAssignedID.Status.Status == aadpodid.AssignedIDCreated {
   777  			// if the old assigned id is in created state, then the identity assignment to the node
   778  			// is not done. Adding to the list will ensure we retry identity assignment to node for
   779  			// this assigned identity.
   780  			klog.V(5).Infof("ok: %v, Create added: %s as assignedID in CREATED state", idMatch, assignedIDName)
   781  			create[assignedIDName] = oldAssignedID
   782  		}
   783  		if !idMatch {
   784  			// We are done checking that this new id is not present in the old
   785  			// list. So we will add it to the create list.
   786  			klog.V(5).Infof("ok: %v, Create added: %s", idMatch, assignedIDName)
   787  			create[assignedIDName] = newAssignedID
   788  		}
   789  	}
   790  	stats.Put(stats.FindAzureAssignedIdentitiesToCreate, time.Since(begin))
   791  	return create, nil
   792  }
   793  
   794  func (c *Client) getAzureAssignedIDsToDelete(old, new map[string]aadpodid.AzureAssignedIdentity) (map[string]aadpodid.AzureAssignedIdentity, error) {
   795  	delete := make(map[string]aadpodid.AzureAssignedIdentity)
   796  	// nothing to delete
   797  	if len(old) == 0 {
   798  		return delete, nil
   799  	}
   800  	// delete everything as nothing in new
   801  	if len(new) == 0 {
   802  		return old, nil
   803  	}
   804  
   805  	begin := time.Now()
   806  	for assignedIDName, oldAssignedID := range old {
   807  		newAssignedID, exists := new[assignedIDName]
   808  		idMatch := false
   809  		if exists {
   810  			idMatch = c.matchAssignedID(oldAssignedID, newAssignedID)
   811  		}
   812  		// assigned identity exists in the desired list too which means
   813  		// it should not be deleted
   814  		if exists && idMatch {
   815  			continue
   816  		}
   817  		// We are done checking that this old id is not present in the new
   818  		// list. So we will add it to the delete list.
   819  		delete[assignedIDName] = oldAssignedID
   820  	}
   821  	stats.Put(stats.FindAzureAssignedIdentitiesToDelete, time.Since(begin))
   822  	return delete, nil
   823  }
   824  
   825  // getAzureAssignedIdentitiesToUpdate returns a list of assignedIDs that need to be updated
   826  // because of change in azureIdentity or azurerIdentityBinding
   827  // returns 2 maps, first the assigned IDs currently on cluster, second the assignedID value to update with
   828  func (c *Client) getAzureAssignedIdentitiesToUpdate(add, del map[string]aadpodid.AzureAssignedIdentity) (map[string]aadpodid.AzureAssignedIdentity, map[string]aadpodid.AzureAssignedIdentity) {
   829  	beforeUpdate := make(map[string]aadpodid.AzureAssignedIdentity)
   830  	afterUpdate := make(map[string]aadpodid.AzureAssignedIdentity)
   831  	// no updates required as assigned identities will not be in both lists
   832  	if len(add) == 0 || len(del) == 0 {
   833  		return beforeUpdate, afterUpdate
   834  	}
   835  	for assignedIDName, addAssignedID := range add {
   836  		if delAssignedID, exists := del[assignedIDName]; exists {
   837  			objMeta := delAssignedID.ObjectMeta
   838  			// the label should always be the latest as the pod could have moved to a different node
   839  			// with the same assigned identity
   840  			objMeta.SetLabels(addAssignedID.GetObjectMeta().GetLabels())
   841  			addAssignedID.ObjectMeta = objMeta
   842  			// assigned identity exists in add and del list
   843  			// update the assigned identity to the latest
   844  			beforeUpdate[assignedIDName] = delAssignedID
   845  			afterUpdate[assignedIDName] = addAssignedID
   846  			// since this is part of update, remove the assignedID from the add and del list
   847  			delete(add, assignedIDName)
   848  			delete(del, assignedIDName)
   849  		}
   850  	}
   851  	return beforeUpdate, afterUpdate
   852  }
   853  
   854  func (c *Client) makeAssignedIDs(azID aadpodid.AzureIdentity, azBinding aadpodid.AzureIdentityBinding, podName, podNameSpace, nodeName string) (*aadpodid.AzureAssignedIdentity, error) {
   855  	binding := azBinding
   856  	id := azID
   857  
   858  	labels := make(map[string]string)
   859  	labels["nodename"] = nodeName
   860  
   861  	oMeta := v1.ObjectMeta{
   862  		Name:   c.getAssignedIDName(podName, podNameSpace, azID.Name),
   863  		Labels: labels,
   864  	}
   865  	assignedID := &aadpodid.AzureAssignedIdentity{
   866  		ObjectMeta: oMeta,
   867  		Spec: aadpodid.AzureAssignedIdentitySpec{
   868  			AzureIdentityRef: &id,
   869  			AzureBindingRef:  &binding,
   870  			Pod:              podName,
   871  			PodNamespace:     podNameSpace,
   872  			NodeName:         nodeName,
   873  		},
   874  		Status: aadpodid.AzureAssignedIdentityStatus{
   875  			AvailableReplicas: 1,
   876  		},
   877  	}
   878  	// if we are in namespaced mode (or az identity is namespaced)
   879  	if c.IsNamespaced || aadpodid.IsNamespacedIdentity(&id) {
   880  		assignedID.Namespace = azID.Namespace
   881  	} else {
   882  		// eventually this should be identity namespace
   883  		// but to maintain back compat we will use existing
   884  		// behavior
   885  		assignedID.Namespace = "default"
   886  	}
   887  
   888  	klog.V(6).Infof("binding - %+v identity - %+v", azBinding, azID)
   889  	klog.V(5).Infof("making assigned ID: %+v", assignedID)
   890  	return assignedID, nil
   891  }
   892  
   893  func (c *Client) createAssignedIdentity(assignedID *aadpodid.AzureAssignedIdentity) error {
   894  	err := c.CRDClient.CreateAssignedIdentity(assignedID)
   895  	if err != nil {
   896  		return err
   897  	}
   898  	return nil
   899  }
   900  
   901  func (c *Client) removeAssignedIdentity(assignedID *aadpodid.AzureAssignedIdentity) error {
   902  	err := c.CRDClient.RemoveAssignedIdentity(assignedID)
   903  	if err != nil {
   904  		return err
   905  	}
   906  	return nil
   907  }
   908  
   909  func (c *Client) updateAssignedIdentity(assignedID *aadpodid.AzureAssignedIdentity) error {
   910  	return c.CRDClient.UpdateAssignedIdentity(assignedID)
   911  }
   912  
   913  func (c *Client) appendToRemoveListForNode(resourceID, nodeName string, nodeMap map[string]trackUserAssignedMSIIds) {
   914  	if trackList, ok := nodeMap[nodeName]; ok {
   915  		trackList.removeUserAssignedMSIIDs = append(trackList.removeUserAssignedMSIIDs, resourceID)
   916  		nodeMap[nodeName] = trackList
   917  		return
   918  	}
   919  	nodeMap[nodeName] = trackUserAssignedMSIIds{removeUserAssignedMSIIDs: []string{resourceID}}
   920  }
   921  
   922  func (c *Client) appendToAddListForNode(resourceID, nodeName string, nodeMap map[string]trackUserAssignedMSIIds) {
   923  	if trackList, ok := nodeMap[nodeName]; ok {
   924  		trackList.addUserAssignedMSIIDs = append(trackList.addUserAssignedMSIIDs, resourceID)
   925  		nodeMap[nodeName] = trackList
   926  		return
   927  	}
   928  	nodeMap[nodeName] = trackUserAssignedMSIIds{addUserAssignedMSIIDs: []string{resourceID}}
   929  }
   930  
   931  func (c *Client) checkIfUserAssignedMSI(id aadpodid.AzureIdentity) bool {
   932  	return id.Spec.Type == aadpodid.UserAssignedMSI
   933  }
   934  
   935  func (c *Client) getAssignedIDName(podName, podNameSpace, idName string) string {
   936  	return podName + "-" + podNameSpace + "-" + idName
   937  }
   938  
   939  func (c *Client) checkIfMSIExistsOnNode(id *aadpodid.AzureIdentity, nodeName string, nodeMSIList []string) bool {
   940  	for _, userAssignedMSI := range nodeMSIList {
   941  		if strings.EqualFold(userAssignedMSI, id.Spec.ResourceID) {
   942  			return true
   943  		}
   944  	}
   945  	return false
   946  }
   947  
   948  func (c *Client) getUserMSIListForNode(nodeOrVMSSName string, isvmss bool) ([]string, error) {
   949  	return c.CloudClient.GetUserMSIs(nodeOrVMSSName, isvmss)
   950  }
   951  
   952  func getIDKey(ns, name string) string {
   953  	return strings.Join([]string{ns, name}, "/")
   954  }
   955  
   956  func (c *Client) convertIDListToMap(azureIdentities []aadpodid.AzureIdentity) (map[string]aadpodid.AzureIdentity, error) {
   957  	m := make(map[string]aadpodid.AzureIdentity, len(azureIdentities))
   958  	for _, azureIdentity := range azureIdentities {
   959  		// validate the resourceID in azure identity for type 0 (UserAssignedMSI) to ensure format is as expected
   960  		if c.checkIfUserAssignedMSI(azureIdentity) {
   961  			err := utils.ValidateResourceID(azureIdentity.Spec.ResourceID)
   962  			if err != nil {
   963  				klog.Errorf("ignoring azure identity %s/%s, error: %+v", azureIdentity.Namespace, azureIdentity.Name, err)
   964  				continue
   965  			}
   966  		}
   967  		m[getIDKey(azureIdentity.Namespace, azureIdentity.Name)] = azureIdentity
   968  	}
   969  	return m, nil
   970  }
   971  
   972  func (c *Client) checkIfInUse(checkAssignedID aadpodid.AzureAssignedIdentity, assignedIDMap map[string]aadpodid.AzureAssignedIdentity, vmssGroups *vmssGroupList) (bool, error) {
   973  	for _, assignedID := range assignedIDMap {
   974  		checkID := checkAssignedID.Spec.AzureIdentityRef
   975  		id := assignedID.Spec.AzureIdentityRef
   976  		// If they have the same client id, reside on the same node but the pod name is different, then the
   977  		// assigned id is in use.
   978  		// This is applicable only for user assigned MSI since that is node specific. Ignore other cases.
   979  		if checkID.Spec.Type != aadpodid.UserAssignedMSI {
   980  			continue
   981  		}
   982  
   983  		if checkAssignedID.Spec.Pod == assignedID.Spec.Pod {
   984  			// No need to do the rest of the checks in this case, since it's the same assignment
   985  			// The same identity won't be assigned to a pod twice, so it's the same reference.
   986  			continue
   987  		}
   988  
   989  		if checkID.Spec.ClientID != id.Spec.ClientID {
   990  			continue
   991  		}
   992  
   993  		if checkAssignedID.Spec.NodeName == assignedID.Spec.NodeName {
   994  			return true, nil
   995  		}
   996  
   997  		vmss, err := getVMSSGroupFromPossiblyUnreferencedNode(c.NodeClient, vmssGroups, checkAssignedID.Spec.NodeName)
   998  		if err != nil {
   999  			return false, err
  1000  		}
  1001  
  1002  		// check if this identity is used on another node in the same vmss
  1003  		// This check is needed because vmss identities currently operate on all nodes
  1004  		// in the vmss not just a single node.
  1005  		if vmss != nil && vmss.hasNode(assignedID.Spec.NodeName) {
  1006  			return true, nil
  1007  		}
  1008  	}
  1009  
  1010  	return false, nil
  1011  }
  1012  
  1013  func (c *Client) getUniqueIDs(idList []string) []string {
  1014  	idSet := make(map[string]struct{})
  1015  	var uniqueList []string
  1016  
  1017  	for _, id := range idList {
  1018  		idSet[id] = struct{}{}
  1019  	}
  1020  	for id := range idSet {
  1021  		uniqueList = append(uniqueList, id)
  1022  	}
  1023  	return uniqueList
  1024  }
  1025  
  1026  func (c *Client) updateAssignedIdentityStatus(assignedID *aadpodid.AzureAssignedIdentity, status string) error {
  1027  	return c.CRDClient.UpdateAzureAssignedIdentityStatus(assignedID, status)
  1028  }
  1029  
  1030  func (c *Client) updateNodeAndDeps(newAssignedIDs map[string]aadpodid.AzureAssignedIdentity, nodeMap map[string]trackUserAssignedMSIIds, nodeRefs map[string]bool, wg *sync.WaitGroup) {
  1031  	for nodeName, nodeTrackList := range nodeMap {
  1032  		wg.Add(1)
  1033  		go c.updateUserMSI(newAssignedIDs, nodeName, nodeTrackList, nodeRefs, wg)
  1034  	}
  1035  }
  1036  
  1037  func (c *Client) updateUserMSI(newAssignedIDs map[string]aadpodid.AzureAssignedIdentity, nodeOrVMSSName string, nodeTrackList trackUserAssignedMSIIds, nodeRefs map[string]bool, wg *sync.WaitGroup) {
  1038  	defer wg.Done()
  1039  	beginAdding := time.Now()
  1040  	klog.Infof("processing node %s, add [%d], del [%d], update [%d]", nodeOrVMSSName,
  1041  		len(nodeTrackList.assignedIDsToCreate), len(nodeTrackList.assignedIDsToDelete), len(nodeTrackList.assignedIDsToUpdate))
  1042  
  1043  	ctx := context.TODO()
  1044  	// We have to ensure that we don't overwhelm the API server with too many
  1045  	// requests in flight. We use a token based approach implemented using semaphore to
  1046  	// ensure that only given createDeleteBatch requests are in flight at any point in time.
  1047  	// Note that at this point in the code path, we are doing this in parallel per node/VMSS already.
  1048  	semCreateOrUpdate := semaphore.NewWeighted(c.createDeleteBatch)
  1049  
  1050  	for _, createID := range nodeTrackList.assignedIDsToCreate {
  1051  		if err := semCreateOrUpdate.Acquire(ctx, 1); err != nil {
  1052  			klog.Errorf("failed to acquire semaphore in the create loop, error: %+v", err)
  1053  			return
  1054  		}
  1055  		go func(assignedID aadpodid.AzureAssignedIdentity) {
  1056  			defer semCreateOrUpdate.Release(1)
  1057  			if assignedID.Status.Status == "" {
  1058  				binding := assignedID.Spec.AzureBindingRef
  1059  
  1060  				// this is the state when the azure assigned identity is yet to be created
  1061  				klog.V(5).Infof("initiating AzureAssignedIdentity creation for pod - %s, binding - %s", assignedID.Spec.Pod, binding.Name)
  1062  
  1063  				assignedID.Status.Status = aadpodid.AssignedIDCreated
  1064  				err := c.createAssignedIdentity(&assignedID)
  1065  				if err != nil {
  1066  					message := fmt.Sprintf("failed to create AzureAssignedIdentity %s/%s for pod %s/%s, error: %+v", assignedID.Name, assignedID.Namespace, assignedID.Spec.PodNamespace, assignedID.Spec.Pod, err)
  1067  					c.EventRecorder.Event(binding, corev1.EventTypeWarning, "binding apply error", message)
  1068  					klog.Error(message)
  1069  				}
  1070  			}
  1071  		}(createID)
  1072  	}
  1073  
  1074  	for _, updateID := range nodeTrackList.assignedIDsToUpdate {
  1075  		if err := semCreateOrUpdate.Acquire(ctx, 1); err != nil {
  1076  			klog.Errorf("failed to acquire semaphore in the update loop, error: %+v", err)
  1077  			return
  1078  		}
  1079  		go func(assignedID aadpodid.AzureAssignedIdentity) {
  1080  			defer semCreateOrUpdate.Release(1)
  1081  			if assignedID.Status.Status == "" {
  1082  				binding := assignedID.Spec.AzureBindingRef
  1083  
  1084  				// this is the state when the azure assigned identity is yet to be created
  1085  				klog.V(5).Infof("initiating assigned id creation for pod - %s, binding - %s", assignedID.Spec.Pod, binding.Name)
  1086  
  1087  				assignedID.Status.Status = aadpodid.AssignedIDCreated
  1088  				err := c.updateAssignedIdentity(&assignedID)
  1089  				if err != nil {
  1090  					message := fmt.Sprintf("failed to update AzureAssignedIdentity %s/%s for pod %s/%s, error: %+v", assignedID.Namespace, assignedID.Name, assignedID.Spec.Pod, assignedID.Spec.PodNamespace, err)
  1091  					c.EventRecorder.Event(binding, corev1.EventTypeWarning, "binding apply error", message)
  1092  					klog.Error(message)
  1093  				}
  1094  			}
  1095  		}(updateID)
  1096  	}
  1097  
  1098  	// Ensure that all creates are complete
  1099  	if err := semCreateOrUpdate.Acquire(ctx, c.createDeleteBatch); err != nil {
  1100  		klog.Errorf("failed to acquire semaphore at the end of creates, error: %+v", err)
  1101  		return
  1102  	}
  1103  	// generate unique list so we don't make multiple calls to assign/remove same id
  1104  	addUserAssignedMSIIDs := c.getUniqueIDs(nodeTrackList.addUserAssignedMSIIDs)
  1105  	removeUserAssignedMSIIDs := c.getUniqueIDs(nodeTrackList.removeUserAssignedMSIIDs)
  1106  	createOrUpdateList := append([]aadpodid.AzureAssignedIdentity{}, nodeTrackList.assignedIDsToCreate...)
  1107  	createOrUpdateList = append(createOrUpdateList, nodeTrackList.assignedIDsToUpdate...)
  1108  
  1109  	err := c.CloudClient.UpdateUserMSI(addUserAssignedMSIIDs, removeUserAssignedMSIIDs, nodeOrVMSSName, nodeTrackList.isvmss)
  1110  	if err != nil {
  1111  		klog.Errorf("failed to update user-assigned identities on node %s (add [%d], del [%d], update[%d]), error: %+v", nodeOrVMSSName, len(nodeTrackList.assignedIDsToCreate), len(nodeTrackList.assignedIDsToDelete), len(nodeTrackList.assignedIDsToUpdate), err)
  1112  		idList, getErr := c.getUserMSIListForNode(nodeOrVMSSName, nodeTrackList.isvmss)
  1113  		if getErr != nil {
  1114  			klog.Errorf("failed to get a list of user-assigned identites from node %s, error: %+v", nodeOrVMSSName, getErr)
  1115  			return
  1116  		}
  1117  
  1118  		for _, createID := range createOrUpdateList {
  1119  			createID := createID // avoid implicit memory aliasing in for loop
  1120  			id := createID.Spec.AzureIdentityRef
  1121  			binding := createID.Spec.AzureBindingRef
  1122  
  1123  			isUserAssignedMSI := c.checkIfUserAssignedMSI(*id)
  1124  			idExistsOnNode := c.checkIfMSIExistsOnNode(id, createID.Spec.NodeName, idList)
  1125  
  1126  			if isUserAssignedMSI && !idExistsOnNode {
  1127  				message := fmt.Sprintf("failed to apply binding %s/%s node %s for pod %s/%s, error: %+v", binding.Namespace, binding.Name, createID.Spec.NodeName, createID.Spec.PodNamespace, createID.Spec.Pod, err)
  1128  				c.EventRecorder.Event(binding, corev1.EventTypeWarning, "binding apply error", message)
  1129  				klog.Error(message)
  1130  				continue
  1131  			}
  1132  			// the identity was successfully assigned to the node
  1133  			c.EventRecorder.Event(binding, corev1.EventTypeNormal, "binding applied",
  1134  				fmt.Sprintf("binding %s applied on node %s for pod %s", binding.Name, createID.Spec.NodeName, createID.Name))
  1135  
  1136  			klog.Infof("identity %s/%s has successfully been assigned to node %s", id.Namespace, id.Name, createID.Spec.NodeName)
  1137  
  1138  			// Identity is successfully assigned to node, so update the status of assigned identity to assigned
  1139  			if updateErr := c.updateAssignedIdentityStatus(&createID, aadpodid.AssignedIDAssigned); updateErr != nil {
  1140  				message := fmt.Sprintf("failed to update AzureAssignedIdentity %s/%s status to %s for pod %s/%s, error: %+v", createID.Namespace, createID.Name, aadpodid.AssignedIDAssigned, createID.Spec.PodNamespace, createID.Spec.Pod, updateErr)
  1141  				c.EventRecorder.Event(&createID, corev1.EventTypeWarning, "status update error", message)
  1142  				klog.Error(message)
  1143  			}
  1144  
  1145  			isCreateOperation := false
  1146  			for _, i := range nodeTrackList.assignedIDsToCreate {
  1147  				if reflect.DeepEqual(createID, i) {
  1148  					isCreateOperation = true
  1149  					break
  1150  				}
  1151  			}
  1152  			if isCreateOperation {
  1153  				stats.Increment(stats.TotalAzureAssignedIdentitiesCreated, 1)
  1154  			} else {
  1155  				stats.Increment(stats.TotalAzureAssignedIdentitiesUpdated, 1)
  1156  			}
  1157  		}
  1158  
  1159  		for _, delID := range nodeTrackList.assignedIDsToDelete {
  1160  			delID := delID // avoid implicit memory aliasing in for loop
  1161  			id := delID.Spec.AzureIdentityRef
  1162  			removedBinding := delID.Spec.AzureBindingRef
  1163  			isUserAssignedMSI := c.checkIfUserAssignedMSI(*id)
  1164  			idExistsOnNode := c.checkIfMSIExistsOnNode(id, delID.Spec.NodeName, idList)
  1165  			vmssGroups, getErr := getVMSSGroups(c.NodeClient, nodeRefs)
  1166  			if getErr != nil {
  1167  				klog.Errorf("failed to get VMSS groups, error: %+v", getErr)
  1168  				continue
  1169  			}
  1170  			inUse, checkErr := c.checkIfInUse(delID, newAssignedIDs, vmssGroups)
  1171  			if checkErr != nil {
  1172  				klog.Errorf("failed to check if identity is in use, error: %+v", getErr)
  1173  				continue
  1174  			}
  1175  			// the identity still exists on node, which means removing the identity from the node failed
  1176  			if isUserAssignedMSI && !inUse && idExistsOnNode {
  1177  				klog.Errorf("failed to remove AzureIdentityBinding %s from node %s for pod %s/%s, error: %+v", removedBinding.Name, delID.Spec.NodeName, delID.Spec.PodNamespace, delID.Spec.Pod, err)
  1178  				continue
  1179  			}
  1180  
  1181  			klog.Infof("updating msis on node %s failed, but identity %s/%s has successfully been removed from node", delID.Spec.NodeName, id.Namespace, id.Name)
  1182  
  1183  			// remove assigned identity crd from cluster as the identity has successfully been removed from the node
  1184  			err = c.removeAssignedIdentity(&delID)
  1185  			if err != nil {
  1186  				klog.Errorf("failed to remove AzureAssignedIdentity %s, error: %+v", delID.Name, err)
  1187  				continue
  1188  			}
  1189  			klog.Infof("deleted assigned identity %s/%s", delID.Namespace, delID.Name)
  1190  			stats.Increment(stats.TotalAzureAssignedIdentitiesDeleted, 1)
  1191  		}
  1192  		stats.Put(stats.TotalAzureAssignedIdentitiesCreateOrUpdate, time.Since(beginAdding))
  1193  		return
  1194  	}
  1195  
  1196  	semUpdate := semaphore.NewWeighted(c.createDeleteBatch)
  1197  
  1198  	for _, createID := range createOrUpdateList {
  1199  		if err := semUpdate.Acquire(ctx, 1); err != nil {
  1200  			klog.Errorf("failed to acquire semaphore in the update loop, error: %+v", err)
  1201  			return
  1202  		}
  1203  		go func(assignedID aadpodid.AzureAssignedIdentity) {
  1204  			defer semUpdate.Release(1)
  1205  			binding := assignedID.Spec.AzureBindingRef
  1206  			// update the status to assigned for assigned identity as identity was successfully assigned to node.
  1207  			err := c.updateAssignedIdentityStatus(&assignedID, aadpodid.AssignedIDAssigned)
  1208  			if err != nil {
  1209  				message := fmt.Sprintf("failed to update AzureAssignedIdentity %s/%s status to %s for pod %s, error: %+v", assignedID.Namespace, assignedID.Name, aadpodid.AssignedIDAssigned, assignedID.Spec.Pod, err.Error())
  1210  				c.EventRecorder.Event(&assignedID, corev1.EventTypeWarning, "status update error", message)
  1211  				klog.Error(message)
  1212  				return
  1213  			}
  1214  			c.EventRecorder.Event(binding, corev1.EventTypeNormal, "binding applied",
  1215  				fmt.Sprintf("Binding %s applied on node %s for pod %s", binding.Name, assignedID.Spec.NodeName, assignedID.Name))
  1216  		}(createID)
  1217  	}
  1218  
  1219  	// Ensure that all updates are complete
  1220  	if err := semUpdate.Acquire(ctx, c.createDeleteBatch); err != nil {
  1221  		klog.Errorf("failed to acquire semaphore at the end of updates, error: %+v", err)
  1222  		return
  1223  	}
  1224  
  1225  	semDel := semaphore.NewWeighted(c.createDeleteBatch)
  1226  
  1227  	for _, delID := range nodeTrackList.assignedIDsToDelete {
  1228  		if err := semDel.Acquire(ctx, 1); err != nil {
  1229  			klog.Errorf("failed to acquire semaphore in the delete loop, error: %+v", err)
  1230  			return
  1231  		}
  1232  		go func(assignedID aadpodid.AzureAssignedIdentity) {
  1233  			defer semDel.Release(1)
  1234  			// update the status for the assigned identity to Unassigned as the identity has been successfully removed from node.
  1235  			// this will ensure on next sync loop we only try to delete the assigned identity instead of doing everything.
  1236  			err := c.updateAssignedIdentityStatus(&assignedID, aadpodid.AssignedIDUnAssigned)
  1237  			if err != nil {
  1238  				message := fmt.Sprintf("failed to update AzureAssignedIdentity %s/%s status to %s for pod %s/%s, error: %+v", assignedID.Namespace, assignedID.Name, aadpodid.AssignedIDUnAssigned, assignedID.Spec.PodNamespace, assignedID.Spec.Pod, err)
  1239  				c.EventRecorder.Event(&assignedID, corev1.EventTypeWarning, "status update error", message)
  1240  				klog.Error(message)
  1241  				return
  1242  			}
  1243  			// remove assigned identity crd from cluster as the identity has successfully been removed from the node
  1244  			err = c.removeAssignedIdentity(&assignedID)
  1245  			if err != nil {
  1246  				klog.Errorf("failed to remove AzureAssignedIdentity %s/%s, error: %+v", assignedID.Namespace, assignedID.Name, err)
  1247  				return
  1248  			}
  1249  			klog.V(1).Infof("deleted assigned identity %s/%s", assignedID.Namespace, assignedID.Name)
  1250  		}(delID)
  1251  	}
  1252  
  1253  	// Ensure that all deletes are complete
  1254  	if err := semDel.Acquire(ctx, c.createDeleteBatch); err != nil {
  1255  		klog.Errorf("failed to acquire semaphore at the end of deletes, error: %+v", err)
  1256  		return
  1257  	}
  1258  
  1259  	stats.Increment(stats.TotalAzureAssignedIdentitiesCreated, len(nodeTrackList.assignedIDsToCreate))
  1260  	stats.Increment(stats.TotalAzureAssignedIdentitiesUpdated, len(nodeTrackList.assignedIDsToUpdate))
  1261  	stats.Increment(stats.TotalAzureAssignedIdentitiesDeleted, len(nodeTrackList.assignedIDsToDelete))
  1262  	stats.Put(stats.TotalAzureAssignedIdentitiesCreateOrUpdate, time.Since(beginAdding))
  1263  }
  1264  
  1265  // cleanUpAllAssignedIdentitiesOnNode deletes all assigned identities associated with a the node
  1266  func (c *Client) cleanUpAllAssignedIdentitiesOnNode(node string, nodeTrackList trackUserAssignedMSIIds, wg *sync.WaitGroup) {
  1267  	defer wg.Done()
  1268  	klog.Infof("deleting all assigned identites for %s as node not found", node)
  1269  	for _, deleteID := range nodeTrackList.assignedIDsToDelete {
  1270  		deleteID := deleteID // avoid implicit memory aliasing in for loop
  1271  		binding := deleteID.Spec.AzureBindingRef
  1272  
  1273  		err := c.removeAssignedIdentity(&deleteID)
  1274  		if err != nil {
  1275  			message := fmt.Sprintf("failed to remove AzureIdentityBinding %s/%s from node %s for pod %s/%s, error: %v", binding.Namespace, binding.Name, deleteID.Spec.NodeName, deleteID.Spec.PodNamespace, deleteID.Spec.Pod, err)
  1276  			c.EventRecorder.Event(binding, corev1.EventTypeWarning, "binding remove error", message)
  1277  			klog.Error(message)
  1278  			continue
  1279  		}
  1280  		c.EventRecorder.Event(binding, corev1.EventTypeNormal, "binding removed",
  1281  			fmt.Sprintf("Binding %s removed from node %s for pod %s", binding.Name, deleteID.Spec.NodeName, deleteID.Spec.Pod))
  1282  	}
  1283  }
  1284  
  1285  // consolidateVMSSNodes takes a list of all nodes that are part of the current sync cycle, checks if the nodes are
  1286  // part of vmss and combines the vmss nodes into vmss name. This consolidation is needed because vmss identities
  1287  // currently operate on all nodes in the vmss not just a single node.
  1288  func (c *Client) consolidateVMSSNodes(nodeMap map[string]trackUserAssignedMSIIds, wg *sync.WaitGroup) {
  1289  	vmssMap := make(map[string][]string)
  1290  
  1291  	for nodeName, nodeTrackList := range nodeMap {
  1292  		node, err := c.NodeClient.Get(nodeName)
  1293  		if err != nil && !strings.Contains(err.Error(), "not found") {
  1294  			klog.Errorf("failed to get node %s, error: %+v", nodeName, err)
  1295  			continue
  1296  		}
  1297  		if err != nil && strings.Contains(err.Error(), "not found") {
  1298  			klog.Warningf("failed to get node %s while updating user-assigned identities, error: %+v", nodeName, err)
  1299  			wg.Add(1)
  1300  			// node is no longer found in the cluster, all the assigned identities that were created in this sync loop
  1301  			// and those that already exist for this node need to be deleted.
  1302  			go c.cleanUpAllAssignedIdentitiesOnNode(nodeName, nodeTrackList, wg)
  1303  			delete(nodeMap, nodeName)
  1304  			continue
  1305  		}
  1306  		vmssName, isvmss, err := isVMSS(node)
  1307  		if err != nil {
  1308  			klog.Errorf("failed to check if node %s is VMSS, error: %+v", nodeName, err)
  1309  			continue
  1310  		}
  1311  		if isvmss {
  1312  			if nodes, ok := vmssMap[vmssName]; ok {
  1313  				nodes = append(nodes, nodeName)
  1314  				vmssMap[vmssName] = nodes
  1315  				continue
  1316  			}
  1317  			vmssMap[vmssName] = []string{nodeName}
  1318  		}
  1319  	}
  1320  
  1321  	// aggregate vmss nodes into vmss name
  1322  	for vmssName, vmssNodes := range vmssMap {
  1323  		if len(vmssNodes) < 1 {
  1324  			continue
  1325  		}
  1326  
  1327  		vmssTrackList := trackUserAssignedMSIIds{}
  1328  		for _, vmssNode := range vmssNodes {
  1329  			vmssTrackList.addUserAssignedMSIIDs = append(vmssTrackList.addUserAssignedMSIIDs, nodeMap[vmssNode].addUserAssignedMSIIDs...)
  1330  			vmssTrackList.removeUserAssignedMSIIDs = append(vmssTrackList.removeUserAssignedMSIIDs, nodeMap[vmssNode].removeUserAssignedMSIIDs...)
  1331  			vmssTrackList.assignedIDsToCreate = append(vmssTrackList.assignedIDsToCreate, nodeMap[vmssNode].assignedIDsToCreate...)
  1332  			vmssTrackList.assignedIDsToDelete = append(vmssTrackList.assignedIDsToDelete, nodeMap[vmssNode].assignedIDsToDelete...)
  1333  			vmssTrackList.assignedIDsToUpdate = append(vmssTrackList.assignedIDsToUpdate, nodeMap[vmssNode].assignedIDsToUpdate...)
  1334  			vmssTrackList.isvmss = true
  1335  
  1336  			delete(nodeMap, vmssNode)
  1337  			nodeMap[getVMSSName(vmssName)] = vmssTrackList
  1338  		}
  1339  	}
  1340  }
  1341  
  1342  // checkIfIdentityImmutable checks if the identity is immutable
  1343  // if identity is immutable, then it will not be removed from underlying node/vmss
  1344  // returns true if identity is immutable
  1345  func (c *Client) checkIfIdentityImmutable(id string) bool {
  1346  	// no immutable identity list defined, then identity is not immutable and can be safely removed
  1347  	if c.ImmutableUserMSIsMap == nil {
  1348  		return false
  1349  	}
  1350  	// identity is immutable, so should not be deleted from the underlying node/vmss
  1351  	if _, exists := c.ImmutableUserMSIsMap[id]; exists {
  1352  		return true
  1353  	}
  1354  	return false
  1355  }
  1356  
  1357  // generateIdentityAssignmentState generates the current and desired state of each node's identity
  1358  // assignments based on an existing list of AzureAssignedIdentity as the source of truth.
  1359  func (c *Client) generateIdentityAssignmentState() (map[string]map[string]bool, map[string]map[string]bool, map[string]bool, error) {
  1360  	type nodeMetadata struct {
  1361  		nodeName string
  1362  		isVMSS   bool
  1363  	}
  1364  
  1365  	assignedIDs, err := c.CRDClient.ListAssignedIDs()
  1366  	if err != nil {
  1367  		return nil, nil, nil, fmt.Errorf("failed to list AzureAssignedIdentities, error: %+v", err)
  1368  	}
  1369  
  1370  	nodeMetadataCache := make(map[string]nodeMetadata)
  1371  	isVMSSMap := make(map[string]bool)
  1372  	currentState := make(map[string]map[string]bool)
  1373  	desiredState := make(map[string]map[string]bool)
  1374  	for _, assignedID := range *assignedIDs {
  1375  		if _, ok := nodeMetadataCache[assignedID.Spec.NodeName]; !ok {
  1376  			node, err := c.NodeClient.Get(assignedID.Spec.NodeName)
  1377  			if err != nil {
  1378  				return nil, nil, nil, fmt.Errorf("failed to get node %s, error: %+v", assignedID.Spec.NodeName, err)
  1379  			}
  1380  
  1381  			nodeName, isVMSS, err := isVMSS(node)
  1382  			if err != nil {
  1383  				return nil, nil, nil, fmt.Errorf("failed to check if node %s is VMSS, error: %+v", assignedID.Spec.NodeName, err)
  1384  			} else if isVMSS {
  1385  				nodeName = getVMSSName(nodeName)
  1386  			} else {
  1387  				// VM node name does not require conversion
  1388  				nodeName = assignedID.Spec.NodeName
  1389  			}
  1390  
  1391  			// cache node metadata to avoid excessive GET calls
  1392  			nodeMetadataCache[assignedID.Spec.NodeName] = nodeMetadata{
  1393  				nodeName: nodeName,
  1394  				isVMSS:   isVMSS,
  1395  			}
  1396  		}
  1397  
  1398  		nodeName := nodeMetadataCache[assignedID.Spec.NodeName].nodeName
  1399  		isVMSS := nodeMetadataCache[assignedID.Spec.NodeName].isVMSS
  1400  		isVMSSMap[nodeName] = isVMSS
  1401  
  1402  		// only consider AzureAssignedIdentities in ASSIGNED state
  1403  		// do not consider AzureAssignedIdentities in CREATED state because they are either:
  1404  		// 1. in the process of assigning the identities on Azure or
  1405  		// 2. encountering errors when assigning identities on Azure
  1406  		if assignedID.Status.Status == aadpodid.AssignedIDAssigned && assignedID.Spec.AzureIdentityRef.Spec.Type == aadpodid.UserAssignedMSI {
  1407  			if _, ok := desiredState[nodeName]; !ok {
  1408  				desiredState[nodeName] = make(map[string]bool)
  1409  			}
  1410  			desiredState[nodeName][assignedID.Spec.AzureIdentityRef.Spec.ResourceID] = true
  1411  		}
  1412  
  1413  		if _, ok := currentState[nodeName]; !ok {
  1414  			currentState[nodeName] = make(map[string]bool)
  1415  			idList, err := c.getUserMSIListForNode(nodeName, isVMSS)
  1416  			if err != nil {
  1417  				return nil, nil, nil, fmt.Errorf("failed to get a list of user-assigned identites from node %s, error: %+v", nodeName, err)
  1418  			}
  1419  
  1420  			for _, identityResourceID := range idList {
  1421  				currentState[nodeName][identityResourceID] = true
  1422  			}
  1423  		}
  1424  	}
  1425  
  1426  	return currentState, desiredState, isVMSSMap, nil
  1427  }
  1428  
  1429  // generateIdentityAssignmentDiff perform a diff between current
  1430  // and desired state of identity assignment on Azure and returns
  1431  // a map with the node name as the key and a list of user-assigned
  1432  // identities we should assign to the node as the value.
  1433  func generateIdentityAssignmentDiff(currentState map[string]map[string]bool, desiredState map[string]map[string]bool) map[string][]string {
  1434  	diff := make(map[string][]string)
  1435  	for nodeName, identityResourceIDs := range desiredState {
  1436  		var identitiesToAssign []string
  1437  		for identityResourceID := range identityResourceIDs {
  1438  			if _, ok := currentState[nodeName]; ok && currentState[nodeName][identityResourceID] {
  1439  				continue
  1440  			}
  1441  			identitiesToAssign = append(identitiesToAssign, identityResourceID)
  1442  		}
  1443  
  1444  		if len(identitiesToAssign) > 0 {
  1445  			diff[nodeName] = identitiesToAssign
  1446  		}
  1447  	}
  1448  
  1449  	return diff
  1450  }
  1451  
  1452  // reconcileIdentityAssignment uses the existing list of AzureAssignedIdentities
  1453  // as the single source of truth and reconciles identity assignment on Azure.
  1454  func (c *Client) reconcileIdentityAssignment() {
  1455  	currentState, desiredState, isVMSSMap, err := c.generateIdentityAssignmentState()
  1456  	if err != nil {
  1457  		klog.Errorf("failed to generate identity assignment state, error: %+v", err)
  1458  		return
  1459  	}
  1460  
  1461  	klog.V(6).Infof("current state of identity assignment on Azure: %+v", currentState)
  1462  	klog.V(6).Infof("desired state of identity assignment on Azure: %+v", desiredState)
  1463  
  1464  	diff := generateIdentityAssignmentDiff(currentState, desiredState)
  1465  	for nodeNameOnAzure, identitiesToAssign := range diff {
  1466  		klog.Infof("reconciling identity assignment for %v on node %s", identitiesToAssign, nodeNameOnAzure)
  1467  		if err := c.CloudClient.UpdateUserMSI(identitiesToAssign, nil, nodeNameOnAzure, isVMSSMap[nodeNameOnAzure]); err != nil {
  1468  			klog.Errorf("failed to update user-assigned identities on node %s, error: %+v", nodeNameOnAzure, err)
  1469  		}
  1470  	}
  1471  }