github.com/argoproj/argo-cd@v1.8.7/controller/cache/cache.go (about)

     1  package cache
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"reflect"
     7  	"sync"
     8  
     9  	clustercache "github.com/argoproj/gitops-engine/pkg/cache"
    10  	"github.com/argoproj/gitops-engine/pkg/health"
    11  	"github.com/argoproj/gitops-engine/pkg/utils/kube"
    12  	log "github.com/sirupsen/logrus"
    13  	"golang.org/x/sync/semaphore"
    14  	v1 "k8s.io/api/core/v1"
    15  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    16  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    17  	"k8s.io/apimachinery/pkg/runtime/schema"
    18  	"k8s.io/apimachinery/pkg/watch"
    19  	"k8s.io/client-go/tools/cache"
    20  
    21  	"github.com/argoproj/argo-cd/common"
    22  	"github.com/argoproj/argo-cd/controller/metrics"
    23  	appv1 "github.com/argoproj/argo-cd/pkg/apis/application/v1alpha1"
    24  	"github.com/argoproj/argo-cd/util/argo"
    25  	"github.com/argoproj/argo-cd/util/db"
    26  	logutils "github.com/argoproj/argo-cd/util/log"
    27  	"github.com/argoproj/argo-cd/util/lua"
    28  	"github.com/argoproj/argo-cd/util/settings"
    29  )
    30  
    31  type LiveStateCache interface {
    32  	// Returns k8s server version
    33  	GetVersionsInfo(serverURL string) (string, []metav1.APIGroup, error)
    34  	// Returns true of given group kind is a namespaced resource
    35  	IsNamespaced(server string, gk schema.GroupKind) (bool, error)
    36  	// Returns synced cluster cache
    37  	GetClusterCache(server string) (clustercache.ClusterCache, error)
    38  	// Executes give callback against resource specified by the key and all its children
    39  	IterateHierarchy(server string, key kube.ResourceKey, action func(child appv1.ResourceNode, appName string)) error
    40  	// Returns state of live nodes which correspond for target nodes of specified application.
    41  	GetManagedLiveObjs(a *appv1.Application, targetObjs []*unstructured.Unstructured) (map[kube.ResourceKey]*unstructured.Unstructured, error)
    42  	// Returns all top level resources (resources without owner references) of a specified namespace
    43  	GetNamespaceTopLevelResources(server string, namespace string) (map[kube.ResourceKey]appv1.ResourceNode, error)
    44  	// Starts watching resources of each controlled cluster.
    45  	Run(ctx context.Context) error
    46  	// Returns information about monitored clusters
    47  	GetClustersInfo() []clustercache.ClusterInfo
    48  	// Init must be executed before cache can be used
    49  	Init() error
    50  }
    51  
    52  type ObjectUpdatedHandler = func(managedByApp map[string]bool, ref v1.ObjectReference)
    53  
    54  type ResourceInfo struct {
    55  	Info    []appv1.InfoItem
    56  	AppName string
    57  	// networkingInfo are available only for known types involved into networking: Ingress, Service, Pod
    58  	NetworkingInfo *appv1.ResourceNetworkingInfo
    59  	Images         []string
    60  	Health         *health.HealthStatus
    61  }
    62  
    63  func NewLiveStateCache(
    64  	db db.ArgoDB,
    65  	appInformer cache.SharedIndexInformer,
    66  	settingsMgr *settings.SettingsManager,
    67  	kubectl kube.Kubectl,
    68  	metricsServer *metrics.MetricsServer,
    69  	onObjectUpdated ObjectUpdatedHandler,
    70  	clusterFilter func(cluster *appv1.Cluster) bool) LiveStateCache {
    71  
    72  	return &liveStateCache{
    73  		appInformer:     appInformer,
    74  		db:              db,
    75  		clusters:        make(map[string]clustercache.ClusterCache),
    76  		onObjectUpdated: onObjectUpdated,
    77  		kubectl:         kubectl,
    78  		settingsMgr:     settingsMgr,
    79  		metricsServer:   metricsServer,
    80  		// The default limit of 50 is chosen based on experiments.
    81  		listSemaphore: semaphore.NewWeighted(50),
    82  		clusterFilter: clusterFilter,
    83  	}
    84  }
    85  
    86  type cacheSettings struct {
    87  	clusterSettings     clustercache.Settings
    88  	appInstanceLabelKey string
    89  }
    90  
    91  type liveStateCache struct {
    92  	db              db.ArgoDB
    93  	appInformer     cache.SharedIndexInformer
    94  	onObjectUpdated ObjectUpdatedHandler
    95  	kubectl         kube.Kubectl
    96  	settingsMgr     *settings.SettingsManager
    97  	metricsServer   *metrics.MetricsServer
    98  	clusterFilter   func(cluster *appv1.Cluster) bool
    99  
   100  	// listSemaphore is used to limit the number of concurrent memory consuming operations on the
   101  	// k8s list queries results across all clusters to avoid memory spikes during cache initialization.
   102  	listSemaphore *semaphore.Weighted
   103  
   104  	clusters      map[string]clustercache.ClusterCache
   105  	cacheSettings cacheSettings
   106  	lock          sync.RWMutex
   107  }
   108  
   109  func (c *liveStateCache) loadCacheSettings() (*cacheSettings, error) {
   110  	appInstanceLabelKey, err := c.settingsMgr.GetAppInstanceLabelKey()
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  	resourcesFilter, err := c.settingsMgr.GetResourcesFilter()
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  	resourceOverrides, err := c.settingsMgr.GetResourceOverrides()
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  	clusterSettings := clustercache.Settings{
   123  		ResourceHealthOverride: lua.ResourceHealthOverrides(resourceOverrides),
   124  		ResourcesFilter:        resourcesFilter,
   125  	}
   126  	return &cacheSettings{clusterSettings, appInstanceLabelKey}, nil
   127  }
   128  
   129  func asResourceNode(r *clustercache.Resource) appv1.ResourceNode {
   130  	gv, err := schema.ParseGroupVersion(r.Ref.APIVersion)
   131  	if err != nil {
   132  		gv = schema.GroupVersion{}
   133  	}
   134  	parentRefs := make([]appv1.ResourceRef, len(r.OwnerRefs))
   135  	for _, ownerRef := range r.OwnerRefs {
   136  		ownerGvk := schema.FromAPIVersionAndKind(ownerRef.APIVersion, ownerRef.Kind)
   137  		ownerKey := kube.NewResourceKey(ownerGvk.Group, ownerRef.Kind, r.Ref.Namespace, ownerRef.Name)
   138  		parentRefs[0] = appv1.ResourceRef{Name: ownerRef.Name, Kind: ownerKey.Kind, Namespace: r.Ref.Namespace, Group: ownerKey.Group, UID: string(ownerRef.UID)}
   139  	}
   140  	var resHealth *appv1.HealthStatus
   141  	resourceInfo := resInfo(r)
   142  	if resourceInfo.Health != nil {
   143  		resHealth = &appv1.HealthStatus{Status: resourceInfo.Health.Status, Message: resourceInfo.Health.Message}
   144  	}
   145  	return appv1.ResourceNode{
   146  		ResourceRef: appv1.ResourceRef{
   147  			UID:       string(r.Ref.UID),
   148  			Name:      r.Ref.Name,
   149  			Group:     gv.Group,
   150  			Version:   gv.Version,
   151  			Kind:      r.Ref.Kind,
   152  			Namespace: r.Ref.Namespace,
   153  		},
   154  		ParentRefs:      parentRefs,
   155  		Info:            resourceInfo.Info,
   156  		ResourceVersion: r.ResourceVersion,
   157  		NetworkingInfo:  resourceInfo.NetworkingInfo,
   158  		Images:          resourceInfo.Images,
   159  		Health:          resHealth,
   160  		CreatedAt:       r.CreationTimestamp,
   161  	}
   162  }
   163  
   164  func resInfo(r *clustercache.Resource) *ResourceInfo {
   165  	info, ok := r.Info.(*ResourceInfo)
   166  	if !ok || info == nil {
   167  		info = &ResourceInfo{}
   168  	}
   169  	return info
   170  }
   171  
   172  func isRootAppNode(r *clustercache.Resource) bool {
   173  	return resInfo(r).AppName != "" && len(r.OwnerRefs) == 0
   174  }
   175  
   176  func getApp(r *clustercache.Resource, ns map[kube.ResourceKey]*clustercache.Resource) string {
   177  	return getAppRecursive(r, ns, map[kube.ResourceKey]bool{})
   178  }
   179  
   180  func ownerRefGV(ownerRef metav1.OwnerReference) schema.GroupVersion {
   181  	gv, err := schema.ParseGroupVersion(ownerRef.APIVersion)
   182  	if err != nil {
   183  		gv = schema.GroupVersion{}
   184  	}
   185  	return gv
   186  }
   187  
   188  func getAppRecursive(r *clustercache.Resource, ns map[kube.ResourceKey]*clustercache.Resource, visited map[kube.ResourceKey]bool) string {
   189  	if !visited[r.ResourceKey()] {
   190  		visited[r.ResourceKey()] = true
   191  	} else {
   192  		log.Warnf("Circular dependency detected: %v.", visited)
   193  		return resInfo(r).AppName
   194  	}
   195  
   196  	if resInfo(r).AppName != "" {
   197  		return resInfo(r).AppName
   198  	}
   199  	for _, ownerRef := range r.OwnerRefs {
   200  		gv := ownerRefGV(ownerRef)
   201  		if parent, ok := ns[kube.NewResourceKey(gv.Group, ownerRef.Kind, r.Ref.Namespace, ownerRef.Name)]; ok {
   202  			app := getAppRecursive(parent, ns, visited)
   203  			if app != "" {
   204  				return app
   205  			}
   206  		}
   207  	}
   208  	return ""
   209  }
   210  
   211  var (
   212  	ignoredRefreshResources = map[string]bool{
   213  		"/" + kube.EndpointsKind: true,
   214  	}
   215  )
   216  
   217  // skipAppRequeuing checks if the object is an API type which we want to skip requeuing against.
   218  // We ignore API types which have a high churn rate, and/or whose updates are irrelevant to the app
   219  func skipAppRequeuing(key kube.ResourceKey) bool {
   220  	return ignoredRefreshResources[key.Group+"/"+key.Kind]
   221  }
   222  
   223  func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, error) {
   224  	c.lock.RLock()
   225  	clusterCache, ok := c.clusters[server]
   226  	cacheSettings := c.cacheSettings
   227  	c.lock.RUnlock()
   228  
   229  	if ok {
   230  		return clusterCache, nil
   231  	}
   232  
   233  	c.lock.Lock()
   234  	defer c.lock.Unlock()
   235  
   236  	clusterCache, ok = c.clusters[server]
   237  	if ok {
   238  		return clusterCache, nil
   239  	}
   240  
   241  	cluster, err := c.db.GetCluster(context.Background(), server)
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  
   246  	if !c.canHandleCluster(cluster) {
   247  		return nil, fmt.Errorf("controller is configured to ignore cluster %s", cluster.Server)
   248  	}
   249  
   250  	clusterCache = clustercache.NewClusterCache(cluster.RESTConfig(),
   251  		clustercache.SetListSemaphore(c.listSemaphore),
   252  		clustercache.SetResyncTimeout(common.K8SClusterResyncDuration),
   253  		clustercache.SetSettings(cacheSettings.clusterSettings),
   254  		clustercache.SetNamespaces(cluster.Namespaces),
   255  		clustercache.SetPopulateResourceInfoHandler(func(un *unstructured.Unstructured, isRoot bool) (interface{}, bool) {
   256  			res := &ResourceInfo{}
   257  			populateNodeInfo(un, res)
   258  			res.Health, _ = health.GetResourceHealth(un, cacheSettings.clusterSettings.ResourceHealthOverride)
   259  			appName := kube.GetAppInstanceLabel(un, cacheSettings.appInstanceLabelKey)
   260  			if isRoot && appName != "" {
   261  				res.AppName = appName
   262  			}
   263  
   264  			// edge case. we do not label CRDs, so they miss the tracking label we inject. But we still
   265  			// want the full resource to be available in our cache (to diff), so we store all CRDs
   266  			return res, res.AppName != "" || un.GroupVersionKind().Kind == kube.CustomResourceDefinitionKind
   267  		}),
   268  		clustercache.SetLogr(logutils.NewLogrusLogger(log.WithField("server", cluster.Server))),
   269  	)
   270  
   271  	_ = clusterCache.OnResourceUpdated(func(newRes *clustercache.Resource, oldRes *clustercache.Resource, namespaceResources map[kube.ResourceKey]*clustercache.Resource) {
   272  		toNotify := make(map[string]bool)
   273  		var ref v1.ObjectReference
   274  		if newRes != nil {
   275  			ref = newRes.Ref
   276  		} else {
   277  			ref = oldRes.Ref
   278  		}
   279  		for _, r := range []*clustercache.Resource{newRes, oldRes} {
   280  			if r == nil {
   281  				continue
   282  			}
   283  			app := getApp(r, namespaceResources)
   284  			if app == "" || skipAppRequeuing(r.ResourceKey()) {
   285  				continue
   286  			}
   287  			toNotify[app] = isRootAppNode(r) || toNotify[app]
   288  		}
   289  		c.onObjectUpdated(toNotify, ref)
   290  	})
   291  
   292  	_ = clusterCache.OnEvent(func(event watch.EventType, un *unstructured.Unstructured) {
   293  		gvk := un.GroupVersionKind()
   294  		c.metricsServer.IncClusterEventsCount(cluster.Server, gvk.Group, gvk.Kind)
   295  	})
   296  
   297  	c.clusters[server] = clusterCache
   298  
   299  	return clusterCache, nil
   300  }
   301  
   302  func (c *liveStateCache) getSyncedCluster(server string) (clustercache.ClusterCache, error) {
   303  	clusterCache, err := c.getCluster(server)
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  	err = clusterCache.EnsureSynced()
   308  	if err != nil {
   309  		return nil, err
   310  	}
   311  	return clusterCache, nil
   312  }
   313  
   314  func (c *liveStateCache) invalidate(cacheSettings cacheSettings) {
   315  	log.Info("invalidating live state cache")
   316  	c.lock.Lock()
   317  	defer c.lock.Unlock()
   318  
   319  	c.cacheSettings = cacheSettings
   320  	for _, clust := range c.clusters {
   321  		clust.Invalidate(clustercache.SetSettings(cacheSettings.clusterSettings))
   322  	}
   323  	log.Info("live state cache invalidated")
   324  }
   325  
   326  func (c *liveStateCache) IsNamespaced(server string, gk schema.GroupKind) (bool, error) {
   327  	clusterInfo, err := c.getSyncedCluster(server)
   328  	if err != nil {
   329  		return false, err
   330  	}
   331  	return clusterInfo.IsNamespaced(gk)
   332  }
   333  
   334  func (c *liveStateCache) IterateHierarchy(server string, key kube.ResourceKey, action func(child appv1.ResourceNode, appName string)) error {
   335  	clusterInfo, err := c.getSyncedCluster(server)
   336  	if err != nil {
   337  		return err
   338  	}
   339  	clusterInfo.IterateHierarchy(key, func(resource *clustercache.Resource, namespaceResources map[kube.ResourceKey]*clustercache.Resource) {
   340  		action(asResourceNode(resource), getApp(resource, namespaceResources))
   341  	})
   342  	return nil
   343  }
   344  
   345  func (c *liveStateCache) GetNamespaceTopLevelResources(server string, namespace string) (map[kube.ResourceKey]appv1.ResourceNode, error) {
   346  	clusterInfo, err := c.getSyncedCluster(server)
   347  	if err != nil {
   348  		return nil, err
   349  	}
   350  	resources := clusterInfo.GetNamespaceTopLevelResources(namespace)
   351  	res := make(map[kube.ResourceKey]appv1.ResourceNode)
   352  	for k, r := range resources {
   353  		res[k] = asResourceNode(r)
   354  	}
   355  	return res, nil
   356  }
   357  
   358  func (c *liveStateCache) GetManagedLiveObjs(a *appv1.Application, targetObjs []*unstructured.Unstructured) (map[kube.ResourceKey]*unstructured.Unstructured, error) {
   359  	clusterInfo, err := c.getSyncedCluster(a.Spec.Destination.Server)
   360  	if err != nil {
   361  		return nil, err
   362  	}
   363  	return clusterInfo.GetManagedLiveObjs(targetObjs, func(r *clustercache.Resource) bool {
   364  		return resInfo(r).AppName == a.Name
   365  	})
   366  }
   367  
   368  func (c *liveStateCache) GetVersionsInfo(serverURL string) (string, []metav1.APIGroup, error) {
   369  	clusterInfo, err := c.getSyncedCluster(serverURL)
   370  	if err != nil {
   371  		return "", nil, err
   372  	}
   373  	return clusterInfo.GetServerVersion(), clusterInfo.GetAPIGroups(), nil
   374  }
   375  
   376  func (c *liveStateCache) isClusterHasApps(apps []interface{}, cluster *appv1.Cluster) bool {
   377  	for _, obj := range apps {
   378  		app, ok := obj.(*appv1.Application)
   379  		if !ok {
   380  			continue
   381  		}
   382  		err := argo.ValidateDestination(context.Background(), &app.Spec.Destination, c.db)
   383  		if err != nil {
   384  			continue
   385  		}
   386  		if app.Spec.Destination.Server == cluster.Server {
   387  			return true
   388  		}
   389  	}
   390  	return false
   391  }
   392  
   393  func (c *liveStateCache) watchSettings(ctx context.Context) {
   394  	updateCh := make(chan *settings.ArgoCDSettings, 1)
   395  	c.settingsMgr.Subscribe(updateCh)
   396  
   397  	done := false
   398  	for !done {
   399  		select {
   400  		case <-updateCh:
   401  			nextCacheSettings, err := c.loadCacheSettings()
   402  			if err != nil {
   403  				log.Warnf("Failed to read updated settings: %v", err)
   404  				continue
   405  			}
   406  
   407  			c.lock.Lock()
   408  			needInvalidate := false
   409  			if !reflect.DeepEqual(c.cacheSettings, *nextCacheSettings) {
   410  				c.cacheSettings = *nextCacheSettings
   411  				needInvalidate = true
   412  			}
   413  			c.lock.Unlock()
   414  			if needInvalidate {
   415  				c.invalidate(*nextCacheSettings)
   416  			}
   417  		case <-ctx.Done():
   418  			done = true
   419  		}
   420  	}
   421  	log.Info("shutting down settings watch")
   422  	c.settingsMgr.Unsubscribe(updateCh)
   423  	close(updateCh)
   424  }
   425  
   426  func (c *liveStateCache) Init() error {
   427  	cacheSettings, err := c.loadCacheSettings()
   428  	if err != nil {
   429  		return err
   430  	}
   431  	c.cacheSettings = *cacheSettings
   432  	return nil
   433  }
   434  
   435  // Run watches for resource changes annotated with application label on all registered clusters and schedule corresponding app refresh.
   436  func (c *liveStateCache) Run(ctx context.Context) error {
   437  	go c.watchSettings(ctx)
   438  
   439  	kube.RetryUntilSucceed(ctx, clustercache.ClusterRetryTimeout, "watch clusters", logutils.NewLogrusLogger(log.New()), func() error {
   440  		return c.db.WatchClusters(ctx, c.handleAddEvent, c.handleModEvent, c.handleDeleteEvent)
   441  	})
   442  
   443  	<-ctx.Done()
   444  	c.invalidate(c.cacheSettings)
   445  	return nil
   446  }
   447  
   448  func (c *liveStateCache) canHandleCluster(cluster *appv1.Cluster) bool {
   449  	if c.clusterFilter == nil {
   450  		return true
   451  	}
   452  	return c.clusterFilter(cluster)
   453  }
   454  
   455  func (c *liveStateCache) handleAddEvent(cluster *appv1.Cluster) {
   456  	if !c.canHandleCluster(cluster) {
   457  		log.Infof("Ignoring cluster %s", cluster.Server)
   458  		return
   459  	}
   460  
   461  	c.lock.Lock()
   462  	_, ok := c.clusters[cluster.Server]
   463  	c.lock.Unlock()
   464  	if !ok {
   465  		if c.isClusterHasApps(c.appInformer.GetStore().List(), cluster) {
   466  			go func() {
   467  				// warm up cache for cluster with apps
   468  				_, _ = c.getSyncedCluster(cluster.Server)
   469  			}()
   470  		}
   471  	}
   472  }
   473  
   474  func (c *liveStateCache) handleModEvent(oldCluster *appv1.Cluster, newCluster *appv1.Cluster) {
   475  	c.lock.Lock()
   476  	cluster, ok := c.clusters[newCluster.Server]
   477  	c.lock.Unlock()
   478  	if ok {
   479  		if !c.canHandleCluster(newCluster) {
   480  			cluster.Invalidate()
   481  			c.lock.Lock()
   482  			delete(c.clusters, newCluster.Server)
   483  			c.lock.Unlock()
   484  			return
   485  		}
   486  
   487  		var updateSettings []clustercache.UpdateSettingsFunc
   488  		if !reflect.DeepEqual(oldCluster.Config, newCluster.Config) {
   489  			updateSettings = append(updateSettings, clustercache.SetConfig(newCluster.RESTConfig()))
   490  		}
   491  		if !reflect.DeepEqual(oldCluster.Namespaces, newCluster.Namespaces) {
   492  			updateSettings = append(updateSettings, clustercache.SetNamespaces(newCluster.Namespaces))
   493  		}
   494  		forceInvalidate := false
   495  		if newCluster.RefreshRequestedAt != nil &&
   496  			cluster.GetClusterInfo().LastCacheSyncTime != nil &&
   497  			cluster.GetClusterInfo().LastCacheSyncTime.Before(newCluster.RefreshRequestedAt.Time) {
   498  			forceInvalidate = true
   499  		}
   500  
   501  		if len(updateSettings) > 0 || forceInvalidate {
   502  			cluster.Invalidate(updateSettings...)
   503  			go func() {
   504  				// warm up cluster cache
   505  				_ = cluster.EnsureSynced()
   506  			}()
   507  		}
   508  	}
   509  
   510  }
   511  
   512  func (c *liveStateCache) handleDeleteEvent(clusterServer string) {
   513  	c.lock.Lock()
   514  	defer c.lock.Unlock()
   515  	cluster, ok := c.clusters[clusterServer]
   516  	if ok {
   517  		cluster.Invalidate()
   518  		delete(c.clusters, clusterServer)
   519  	}
   520  }
   521  
   522  func (c *liveStateCache) GetClustersInfo() []clustercache.ClusterInfo {
   523  	clusters := make(map[string]clustercache.ClusterCache)
   524  	c.lock.RLock()
   525  	for k := range c.clusters {
   526  		clusters[k] = c.clusters[k]
   527  	}
   528  	c.lock.RUnlock()
   529  
   530  	res := make([]clustercache.ClusterInfo, 0)
   531  	for server, c := range clusters {
   532  		info := c.GetClusterInfo()
   533  		info.Server = server
   534  		res = append(res, info)
   535  	}
   536  	return res
   537  }
   538  
   539  func (c *liveStateCache) GetClusterCache(server string) (clustercache.ClusterCache, error) {
   540  	return c.getSyncedCluster(server)
   541  }