github.com/argoproj/argo-cd@v1.8.7/controller/cache/cache.go (about) 1 package cache 2 3 import ( 4 "context" 5 "fmt" 6 "reflect" 7 "sync" 8 9 clustercache "github.com/argoproj/gitops-engine/pkg/cache" 10 "github.com/argoproj/gitops-engine/pkg/health" 11 "github.com/argoproj/gitops-engine/pkg/utils/kube" 12 log "github.com/sirupsen/logrus" 13 "golang.org/x/sync/semaphore" 14 v1 "k8s.io/api/core/v1" 15 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 17 "k8s.io/apimachinery/pkg/runtime/schema" 18 "k8s.io/apimachinery/pkg/watch" 19 "k8s.io/client-go/tools/cache" 20 21 "github.com/argoproj/argo-cd/common" 22 "github.com/argoproj/argo-cd/controller/metrics" 23 appv1 "github.com/argoproj/argo-cd/pkg/apis/application/v1alpha1" 24 "github.com/argoproj/argo-cd/util/argo" 25 "github.com/argoproj/argo-cd/util/db" 26 logutils "github.com/argoproj/argo-cd/util/log" 27 "github.com/argoproj/argo-cd/util/lua" 28 "github.com/argoproj/argo-cd/util/settings" 29 ) 30 31 type LiveStateCache interface { 32 // Returns k8s server version 33 GetVersionsInfo(serverURL string) (string, []metav1.APIGroup, error) 34 // Returns true of given group kind is a namespaced resource 35 IsNamespaced(server string, gk schema.GroupKind) (bool, error) 36 // Returns synced cluster cache 37 GetClusterCache(server string) (clustercache.ClusterCache, error) 38 // Executes give callback against resource specified by the key and all its children 39 IterateHierarchy(server string, key kube.ResourceKey, action func(child appv1.ResourceNode, appName string)) error 40 // Returns state of live nodes which correspond for target nodes of specified application. 41 GetManagedLiveObjs(a *appv1.Application, targetObjs []*unstructured.Unstructured) (map[kube.ResourceKey]*unstructured.Unstructured, error) 42 // Returns all top level resources (resources without owner references) of a specified namespace 43 GetNamespaceTopLevelResources(server string, namespace string) (map[kube.ResourceKey]appv1.ResourceNode, error) 44 // Starts watching resources of each controlled cluster. 45 Run(ctx context.Context) error 46 // Returns information about monitored clusters 47 GetClustersInfo() []clustercache.ClusterInfo 48 // Init must be executed before cache can be used 49 Init() error 50 } 51 52 type ObjectUpdatedHandler = func(managedByApp map[string]bool, ref v1.ObjectReference) 53 54 type ResourceInfo struct { 55 Info []appv1.InfoItem 56 AppName string 57 // networkingInfo are available only for known types involved into networking: Ingress, Service, Pod 58 NetworkingInfo *appv1.ResourceNetworkingInfo 59 Images []string 60 Health *health.HealthStatus 61 } 62 63 func NewLiveStateCache( 64 db db.ArgoDB, 65 appInformer cache.SharedIndexInformer, 66 settingsMgr *settings.SettingsManager, 67 kubectl kube.Kubectl, 68 metricsServer *metrics.MetricsServer, 69 onObjectUpdated ObjectUpdatedHandler, 70 clusterFilter func(cluster *appv1.Cluster) bool) LiveStateCache { 71 72 return &liveStateCache{ 73 appInformer: appInformer, 74 db: db, 75 clusters: make(map[string]clustercache.ClusterCache), 76 onObjectUpdated: onObjectUpdated, 77 kubectl: kubectl, 78 settingsMgr: settingsMgr, 79 metricsServer: metricsServer, 80 // The default limit of 50 is chosen based on experiments. 81 listSemaphore: semaphore.NewWeighted(50), 82 clusterFilter: clusterFilter, 83 } 84 } 85 86 type cacheSettings struct { 87 clusterSettings clustercache.Settings 88 appInstanceLabelKey string 89 } 90 91 type liveStateCache struct { 92 db db.ArgoDB 93 appInformer cache.SharedIndexInformer 94 onObjectUpdated ObjectUpdatedHandler 95 kubectl kube.Kubectl 96 settingsMgr *settings.SettingsManager 97 metricsServer *metrics.MetricsServer 98 clusterFilter func(cluster *appv1.Cluster) bool 99 100 // listSemaphore is used to limit the number of concurrent memory consuming operations on the 101 // k8s list queries results across all clusters to avoid memory spikes during cache initialization. 102 listSemaphore *semaphore.Weighted 103 104 clusters map[string]clustercache.ClusterCache 105 cacheSettings cacheSettings 106 lock sync.RWMutex 107 } 108 109 func (c *liveStateCache) loadCacheSettings() (*cacheSettings, error) { 110 appInstanceLabelKey, err := c.settingsMgr.GetAppInstanceLabelKey() 111 if err != nil { 112 return nil, err 113 } 114 resourcesFilter, err := c.settingsMgr.GetResourcesFilter() 115 if err != nil { 116 return nil, err 117 } 118 resourceOverrides, err := c.settingsMgr.GetResourceOverrides() 119 if err != nil { 120 return nil, err 121 } 122 clusterSettings := clustercache.Settings{ 123 ResourceHealthOverride: lua.ResourceHealthOverrides(resourceOverrides), 124 ResourcesFilter: resourcesFilter, 125 } 126 return &cacheSettings{clusterSettings, appInstanceLabelKey}, nil 127 } 128 129 func asResourceNode(r *clustercache.Resource) appv1.ResourceNode { 130 gv, err := schema.ParseGroupVersion(r.Ref.APIVersion) 131 if err != nil { 132 gv = schema.GroupVersion{} 133 } 134 parentRefs := make([]appv1.ResourceRef, len(r.OwnerRefs)) 135 for _, ownerRef := range r.OwnerRefs { 136 ownerGvk := schema.FromAPIVersionAndKind(ownerRef.APIVersion, ownerRef.Kind) 137 ownerKey := kube.NewResourceKey(ownerGvk.Group, ownerRef.Kind, r.Ref.Namespace, ownerRef.Name) 138 parentRefs[0] = appv1.ResourceRef{Name: ownerRef.Name, Kind: ownerKey.Kind, Namespace: r.Ref.Namespace, Group: ownerKey.Group, UID: string(ownerRef.UID)} 139 } 140 var resHealth *appv1.HealthStatus 141 resourceInfo := resInfo(r) 142 if resourceInfo.Health != nil { 143 resHealth = &appv1.HealthStatus{Status: resourceInfo.Health.Status, Message: resourceInfo.Health.Message} 144 } 145 return appv1.ResourceNode{ 146 ResourceRef: appv1.ResourceRef{ 147 UID: string(r.Ref.UID), 148 Name: r.Ref.Name, 149 Group: gv.Group, 150 Version: gv.Version, 151 Kind: r.Ref.Kind, 152 Namespace: r.Ref.Namespace, 153 }, 154 ParentRefs: parentRefs, 155 Info: resourceInfo.Info, 156 ResourceVersion: r.ResourceVersion, 157 NetworkingInfo: resourceInfo.NetworkingInfo, 158 Images: resourceInfo.Images, 159 Health: resHealth, 160 CreatedAt: r.CreationTimestamp, 161 } 162 } 163 164 func resInfo(r *clustercache.Resource) *ResourceInfo { 165 info, ok := r.Info.(*ResourceInfo) 166 if !ok || info == nil { 167 info = &ResourceInfo{} 168 } 169 return info 170 } 171 172 func isRootAppNode(r *clustercache.Resource) bool { 173 return resInfo(r).AppName != "" && len(r.OwnerRefs) == 0 174 } 175 176 func getApp(r *clustercache.Resource, ns map[kube.ResourceKey]*clustercache.Resource) string { 177 return getAppRecursive(r, ns, map[kube.ResourceKey]bool{}) 178 } 179 180 func ownerRefGV(ownerRef metav1.OwnerReference) schema.GroupVersion { 181 gv, err := schema.ParseGroupVersion(ownerRef.APIVersion) 182 if err != nil { 183 gv = schema.GroupVersion{} 184 } 185 return gv 186 } 187 188 func getAppRecursive(r *clustercache.Resource, ns map[kube.ResourceKey]*clustercache.Resource, visited map[kube.ResourceKey]bool) string { 189 if !visited[r.ResourceKey()] { 190 visited[r.ResourceKey()] = true 191 } else { 192 log.Warnf("Circular dependency detected: %v.", visited) 193 return resInfo(r).AppName 194 } 195 196 if resInfo(r).AppName != "" { 197 return resInfo(r).AppName 198 } 199 for _, ownerRef := range r.OwnerRefs { 200 gv := ownerRefGV(ownerRef) 201 if parent, ok := ns[kube.NewResourceKey(gv.Group, ownerRef.Kind, r.Ref.Namespace, ownerRef.Name)]; ok { 202 app := getAppRecursive(parent, ns, visited) 203 if app != "" { 204 return app 205 } 206 } 207 } 208 return "" 209 } 210 211 var ( 212 ignoredRefreshResources = map[string]bool{ 213 "/" + kube.EndpointsKind: true, 214 } 215 ) 216 217 // skipAppRequeuing checks if the object is an API type which we want to skip requeuing against. 218 // We ignore API types which have a high churn rate, and/or whose updates are irrelevant to the app 219 func skipAppRequeuing(key kube.ResourceKey) bool { 220 return ignoredRefreshResources[key.Group+"/"+key.Kind] 221 } 222 223 func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, error) { 224 c.lock.RLock() 225 clusterCache, ok := c.clusters[server] 226 cacheSettings := c.cacheSettings 227 c.lock.RUnlock() 228 229 if ok { 230 return clusterCache, nil 231 } 232 233 c.lock.Lock() 234 defer c.lock.Unlock() 235 236 clusterCache, ok = c.clusters[server] 237 if ok { 238 return clusterCache, nil 239 } 240 241 cluster, err := c.db.GetCluster(context.Background(), server) 242 if err != nil { 243 return nil, err 244 } 245 246 if !c.canHandleCluster(cluster) { 247 return nil, fmt.Errorf("controller is configured to ignore cluster %s", cluster.Server) 248 } 249 250 clusterCache = clustercache.NewClusterCache(cluster.RESTConfig(), 251 clustercache.SetListSemaphore(c.listSemaphore), 252 clustercache.SetResyncTimeout(common.K8SClusterResyncDuration), 253 clustercache.SetSettings(cacheSettings.clusterSettings), 254 clustercache.SetNamespaces(cluster.Namespaces), 255 clustercache.SetPopulateResourceInfoHandler(func(un *unstructured.Unstructured, isRoot bool) (interface{}, bool) { 256 res := &ResourceInfo{} 257 populateNodeInfo(un, res) 258 res.Health, _ = health.GetResourceHealth(un, cacheSettings.clusterSettings.ResourceHealthOverride) 259 appName := kube.GetAppInstanceLabel(un, cacheSettings.appInstanceLabelKey) 260 if isRoot && appName != "" { 261 res.AppName = appName 262 } 263 264 // edge case. we do not label CRDs, so they miss the tracking label we inject. But we still 265 // want the full resource to be available in our cache (to diff), so we store all CRDs 266 return res, res.AppName != "" || un.GroupVersionKind().Kind == kube.CustomResourceDefinitionKind 267 }), 268 clustercache.SetLogr(logutils.NewLogrusLogger(log.WithField("server", cluster.Server))), 269 ) 270 271 _ = clusterCache.OnResourceUpdated(func(newRes *clustercache.Resource, oldRes *clustercache.Resource, namespaceResources map[kube.ResourceKey]*clustercache.Resource) { 272 toNotify := make(map[string]bool) 273 var ref v1.ObjectReference 274 if newRes != nil { 275 ref = newRes.Ref 276 } else { 277 ref = oldRes.Ref 278 } 279 for _, r := range []*clustercache.Resource{newRes, oldRes} { 280 if r == nil { 281 continue 282 } 283 app := getApp(r, namespaceResources) 284 if app == "" || skipAppRequeuing(r.ResourceKey()) { 285 continue 286 } 287 toNotify[app] = isRootAppNode(r) || toNotify[app] 288 } 289 c.onObjectUpdated(toNotify, ref) 290 }) 291 292 _ = clusterCache.OnEvent(func(event watch.EventType, un *unstructured.Unstructured) { 293 gvk := un.GroupVersionKind() 294 c.metricsServer.IncClusterEventsCount(cluster.Server, gvk.Group, gvk.Kind) 295 }) 296 297 c.clusters[server] = clusterCache 298 299 return clusterCache, nil 300 } 301 302 func (c *liveStateCache) getSyncedCluster(server string) (clustercache.ClusterCache, error) { 303 clusterCache, err := c.getCluster(server) 304 if err != nil { 305 return nil, err 306 } 307 err = clusterCache.EnsureSynced() 308 if err != nil { 309 return nil, err 310 } 311 return clusterCache, nil 312 } 313 314 func (c *liveStateCache) invalidate(cacheSettings cacheSettings) { 315 log.Info("invalidating live state cache") 316 c.lock.Lock() 317 defer c.lock.Unlock() 318 319 c.cacheSettings = cacheSettings 320 for _, clust := range c.clusters { 321 clust.Invalidate(clustercache.SetSettings(cacheSettings.clusterSettings)) 322 } 323 log.Info("live state cache invalidated") 324 } 325 326 func (c *liveStateCache) IsNamespaced(server string, gk schema.GroupKind) (bool, error) { 327 clusterInfo, err := c.getSyncedCluster(server) 328 if err != nil { 329 return false, err 330 } 331 return clusterInfo.IsNamespaced(gk) 332 } 333 334 func (c *liveStateCache) IterateHierarchy(server string, key kube.ResourceKey, action func(child appv1.ResourceNode, appName string)) error { 335 clusterInfo, err := c.getSyncedCluster(server) 336 if err != nil { 337 return err 338 } 339 clusterInfo.IterateHierarchy(key, func(resource *clustercache.Resource, namespaceResources map[kube.ResourceKey]*clustercache.Resource) { 340 action(asResourceNode(resource), getApp(resource, namespaceResources)) 341 }) 342 return nil 343 } 344 345 func (c *liveStateCache) GetNamespaceTopLevelResources(server string, namespace string) (map[kube.ResourceKey]appv1.ResourceNode, error) { 346 clusterInfo, err := c.getSyncedCluster(server) 347 if err != nil { 348 return nil, err 349 } 350 resources := clusterInfo.GetNamespaceTopLevelResources(namespace) 351 res := make(map[kube.ResourceKey]appv1.ResourceNode) 352 for k, r := range resources { 353 res[k] = asResourceNode(r) 354 } 355 return res, nil 356 } 357 358 func (c *liveStateCache) GetManagedLiveObjs(a *appv1.Application, targetObjs []*unstructured.Unstructured) (map[kube.ResourceKey]*unstructured.Unstructured, error) { 359 clusterInfo, err := c.getSyncedCluster(a.Spec.Destination.Server) 360 if err != nil { 361 return nil, err 362 } 363 return clusterInfo.GetManagedLiveObjs(targetObjs, func(r *clustercache.Resource) bool { 364 return resInfo(r).AppName == a.Name 365 }) 366 } 367 368 func (c *liveStateCache) GetVersionsInfo(serverURL string) (string, []metav1.APIGroup, error) { 369 clusterInfo, err := c.getSyncedCluster(serverURL) 370 if err != nil { 371 return "", nil, err 372 } 373 return clusterInfo.GetServerVersion(), clusterInfo.GetAPIGroups(), nil 374 } 375 376 func (c *liveStateCache) isClusterHasApps(apps []interface{}, cluster *appv1.Cluster) bool { 377 for _, obj := range apps { 378 app, ok := obj.(*appv1.Application) 379 if !ok { 380 continue 381 } 382 err := argo.ValidateDestination(context.Background(), &app.Spec.Destination, c.db) 383 if err != nil { 384 continue 385 } 386 if app.Spec.Destination.Server == cluster.Server { 387 return true 388 } 389 } 390 return false 391 } 392 393 func (c *liveStateCache) watchSettings(ctx context.Context) { 394 updateCh := make(chan *settings.ArgoCDSettings, 1) 395 c.settingsMgr.Subscribe(updateCh) 396 397 done := false 398 for !done { 399 select { 400 case <-updateCh: 401 nextCacheSettings, err := c.loadCacheSettings() 402 if err != nil { 403 log.Warnf("Failed to read updated settings: %v", err) 404 continue 405 } 406 407 c.lock.Lock() 408 needInvalidate := false 409 if !reflect.DeepEqual(c.cacheSettings, *nextCacheSettings) { 410 c.cacheSettings = *nextCacheSettings 411 needInvalidate = true 412 } 413 c.lock.Unlock() 414 if needInvalidate { 415 c.invalidate(*nextCacheSettings) 416 } 417 case <-ctx.Done(): 418 done = true 419 } 420 } 421 log.Info("shutting down settings watch") 422 c.settingsMgr.Unsubscribe(updateCh) 423 close(updateCh) 424 } 425 426 func (c *liveStateCache) Init() error { 427 cacheSettings, err := c.loadCacheSettings() 428 if err != nil { 429 return err 430 } 431 c.cacheSettings = *cacheSettings 432 return nil 433 } 434 435 // Run watches for resource changes annotated with application label on all registered clusters and schedule corresponding app refresh. 436 func (c *liveStateCache) Run(ctx context.Context) error { 437 go c.watchSettings(ctx) 438 439 kube.RetryUntilSucceed(ctx, clustercache.ClusterRetryTimeout, "watch clusters", logutils.NewLogrusLogger(log.New()), func() error { 440 return c.db.WatchClusters(ctx, c.handleAddEvent, c.handleModEvent, c.handleDeleteEvent) 441 }) 442 443 <-ctx.Done() 444 c.invalidate(c.cacheSettings) 445 return nil 446 } 447 448 func (c *liveStateCache) canHandleCluster(cluster *appv1.Cluster) bool { 449 if c.clusterFilter == nil { 450 return true 451 } 452 return c.clusterFilter(cluster) 453 } 454 455 func (c *liveStateCache) handleAddEvent(cluster *appv1.Cluster) { 456 if !c.canHandleCluster(cluster) { 457 log.Infof("Ignoring cluster %s", cluster.Server) 458 return 459 } 460 461 c.lock.Lock() 462 _, ok := c.clusters[cluster.Server] 463 c.lock.Unlock() 464 if !ok { 465 if c.isClusterHasApps(c.appInformer.GetStore().List(), cluster) { 466 go func() { 467 // warm up cache for cluster with apps 468 _, _ = c.getSyncedCluster(cluster.Server) 469 }() 470 } 471 } 472 } 473 474 func (c *liveStateCache) handleModEvent(oldCluster *appv1.Cluster, newCluster *appv1.Cluster) { 475 c.lock.Lock() 476 cluster, ok := c.clusters[newCluster.Server] 477 c.lock.Unlock() 478 if ok { 479 if !c.canHandleCluster(newCluster) { 480 cluster.Invalidate() 481 c.lock.Lock() 482 delete(c.clusters, newCluster.Server) 483 c.lock.Unlock() 484 return 485 } 486 487 var updateSettings []clustercache.UpdateSettingsFunc 488 if !reflect.DeepEqual(oldCluster.Config, newCluster.Config) { 489 updateSettings = append(updateSettings, clustercache.SetConfig(newCluster.RESTConfig())) 490 } 491 if !reflect.DeepEqual(oldCluster.Namespaces, newCluster.Namespaces) { 492 updateSettings = append(updateSettings, clustercache.SetNamespaces(newCluster.Namespaces)) 493 } 494 forceInvalidate := false 495 if newCluster.RefreshRequestedAt != nil && 496 cluster.GetClusterInfo().LastCacheSyncTime != nil && 497 cluster.GetClusterInfo().LastCacheSyncTime.Before(newCluster.RefreshRequestedAt.Time) { 498 forceInvalidate = true 499 } 500 501 if len(updateSettings) > 0 || forceInvalidate { 502 cluster.Invalidate(updateSettings...) 503 go func() { 504 // warm up cluster cache 505 _ = cluster.EnsureSynced() 506 }() 507 } 508 } 509 510 } 511 512 func (c *liveStateCache) handleDeleteEvent(clusterServer string) { 513 c.lock.Lock() 514 defer c.lock.Unlock() 515 cluster, ok := c.clusters[clusterServer] 516 if ok { 517 cluster.Invalidate() 518 delete(c.clusters, clusterServer) 519 } 520 } 521 522 func (c *liveStateCache) GetClustersInfo() []clustercache.ClusterInfo { 523 clusters := make(map[string]clustercache.ClusterCache) 524 c.lock.RLock() 525 for k := range c.clusters { 526 clusters[k] = c.clusters[k] 527 } 528 c.lock.RUnlock() 529 530 res := make([]clustercache.ClusterInfo, 0) 531 for server, c := range clusters { 532 info := c.GetClusterInfo() 533 info.Server = server 534 res = append(res, info) 535 } 536 return res 537 } 538 539 func (c *liveStateCache) GetClusterCache(server string) (clustercache.ClusterCache, error) { 540 return c.getSyncedCluster(server) 541 }