github.com/argoproj/argo-cd/v3@v3.2.1/controller/appcontroller.go (about) 1 package controller 2 3 import ( 4 "context" 5 "encoding/json" 6 stderrors "errors" 7 "fmt" 8 "math" 9 "math/rand" 10 "net/http" 11 "reflect" 12 "runtime/debug" 13 "sort" 14 "strconv" 15 "strings" 16 "sync" 17 "time" 18 19 clustercache "github.com/argoproj/gitops-engine/pkg/cache" 20 "github.com/argoproj/gitops-engine/pkg/diff" 21 "github.com/argoproj/gitops-engine/pkg/health" 22 synccommon "github.com/argoproj/gitops-engine/pkg/sync/common" 23 resourceutil "github.com/argoproj/gitops-engine/pkg/sync/resource" 24 "github.com/argoproj/gitops-engine/pkg/utils/kube" 25 jsonpatch "github.com/evanphx/json-patch" 26 log "github.com/sirupsen/logrus" 27 "golang.org/x/sync/semaphore" 28 corev1 "k8s.io/api/core/v1" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 32 "k8s.io/apimachinery/pkg/labels" 33 apiruntime "k8s.io/apimachinery/pkg/runtime" 34 "k8s.io/apimachinery/pkg/runtime/schema" 35 "k8s.io/apimachinery/pkg/types" 36 "k8s.io/apimachinery/pkg/util/runtime" 37 "k8s.io/apimachinery/pkg/util/wait" 38 "k8s.io/apimachinery/pkg/watch" 39 "k8s.io/client-go/informers" 40 informerv1 "k8s.io/client-go/informers/apps/v1" 41 "k8s.io/client-go/kubernetes" 42 "k8s.io/client-go/tools/cache" 43 "k8s.io/client-go/util/workqueue" 44 "k8s.io/utils/ptr" 45 46 commitclient "github.com/argoproj/argo-cd/v3/commitserver/apiclient" 47 "github.com/argoproj/argo-cd/v3/common" 48 statecache "github.com/argoproj/argo-cd/v3/controller/cache" 49 "github.com/argoproj/argo-cd/v3/controller/hydrator" 50 hydratortypes "github.com/argoproj/argo-cd/v3/controller/hydrator/types" 51 "github.com/argoproj/argo-cd/v3/controller/metrics" 52 "github.com/argoproj/argo-cd/v3/controller/sharding" 53 "github.com/argoproj/argo-cd/v3/pkg/apis/application" 54 appv1 "github.com/argoproj/argo-cd/v3/pkg/apis/application/v1alpha1" 55 appclientset "github.com/argoproj/argo-cd/v3/pkg/client/clientset/versioned" 56 "github.com/argoproj/argo-cd/v3/pkg/client/informers/externalversions/application/v1alpha1" 57 applisters "github.com/argoproj/argo-cd/v3/pkg/client/listers/application/v1alpha1" 58 "github.com/argoproj/argo-cd/v3/reposerver/apiclient" 59 applog "github.com/argoproj/argo-cd/v3/util/app/log" 60 "github.com/argoproj/argo-cd/v3/util/argo" 61 argodiff "github.com/argoproj/argo-cd/v3/util/argo/diff" 62 "github.com/argoproj/argo-cd/v3/util/argo/normalizers" 63 "github.com/argoproj/argo-cd/v3/util/env" 64 "github.com/argoproj/argo-cd/v3/util/stats" 65 66 "github.com/argoproj/argo-cd/v3/pkg/ratelimiter" 67 appstatecache "github.com/argoproj/argo-cd/v3/util/cache/appstate" 68 "github.com/argoproj/argo-cd/v3/util/db" 69 "github.com/argoproj/argo-cd/v3/util/errors" 70 "github.com/argoproj/argo-cd/v3/util/glob" 71 "github.com/argoproj/argo-cd/v3/util/helm" 72 logutils "github.com/argoproj/argo-cd/v3/util/log" 73 settings_util "github.com/argoproj/argo-cd/v3/util/settings" 74 ) 75 76 const ( 77 updateOperationStateTimeout = 1 * time.Second 78 defaultDeploymentInformerResyncDuration = 10 * time.Second 79 // orphanedIndex contains application which monitor orphaned resources by namespace 80 orphanedIndex = "orphaned" 81 ) 82 83 type CompareWith int 84 85 const ( 86 // Compare live application state against state defined in latest git revision with no resolved revision caching. 87 CompareWithLatestForceResolve CompareWith = 3 88 // Compare live application state against state defined in latest git revision. 89 CompareWithLatest CompareWith = 2 90 // Compare live application state against state defined using revision of most recent comparison. 91 CompareWithRecent CompareWith = 1 92 // Skip comparison and only refresh application resources tree 93 ComparisonWithNothing CompareWith = 0 94 ) 95 96 func (a CompareWith) Max(b CompareWith) CompareWith { 97 return CompareWith(math.Max(float64(a), float64(b))) 98 } 99 100 func (a CompareWith) Pointer() *CompareWith { 101 return &a 102 } 103 104 // ApplicationController is the controller for application resources. 105 type ApplicationController struct { 106 cache *appstatecache.Cache 107 namespace string 108 kubeClientset kubernetes.Interface 109 kubectl kube.Kubectl 110 applicationClientset appclientset.Interface 111 auditLogger *argo.AuditLogger 112 // queue contains app namespace/name 113 appRefreshQueue workqueue.TypedRateLimitingInterface[string] 114 // queue contains app namespace/name/comparisonType and used to request app refresh with the predefined comparison type 115 appComparisonTypeRefreshQueue workqueue.TypedRateLimitingInterface[string] 116 appOperationQueue workqueue.TypedRateLimitingInterface[string] 117 projectRefreshQueue workqueue.TypedRateLimitingInterface[string] 118 appHydrateQueue workqueue.TypedRateLimitingInterface[string] 119 hydrationQueue workqueue.TypedRateLimitingInterface[hydratortypes.HydrationQueueKey] 120 appInformer cache.SharedIndexInformer 121 appLister applisters.ApplicationLister 122 projInformer cache.SharedIndexInformer 123 appStateManager AppStateManager 124 stateCache statecache.LiveStateCache 125 statusRefreshTimeout time.Duration 126 statusHardRefreshTimeout time.Duration 127 statusRefreshJitter time.Duration 128 selfHealTimeout time.Duration 129 selfHealBackoff *wait.Backoff 130 selfHealBackoffCooldown time.Duration 131 syncTimeout time.Duration 132 db db.ArgoDB 133 settingsMgr *settings_util.SettingsManager 134 refreshRequestedApps map[string]CompareWith 135 refreshRequestedAppsMutex *sync.Mutex 136 metricsServer *metrics.MetricsServer 137 metricsClusterLabels []string 138 kubectlSemaphore *semaphore.Weighted 139 clusterSharding sharding.ClusterShardingCache 140 projByNameCache sync.Map 141 applicationNamespaces []string 142 ignoreNormalizerOpts normalizers.IgnoreNormalizerOpts 143 144 // dynamicClusterDistributionEnabled if disabled deploymentInformer is never initialized 145 dynamicClusterDistributionEnabled bool 146 deploymentInformer informerv1.DeploymentInformer 147 148 hydrator *hydrator.Hydrator 149 } 150 151 // NewApplicationController creates new instance of ApplicationController. 152 func NewApplicationController( 153 namespace string, 154 settingsMgr *settings_util.SettingsManager, 155 kubeClientset kubernetes.Interface, 156 applicationClientset appclientset.Interface, 157 repoClientset apiclient.Clientset, 158 commitClientset commitclient.Clientset, 159 argoCache *appstatecache.Cache, 160 kubectl kube.Kubectl, 161 appResyncPeriod time.Duration, 162 appHardResyncPeriod time.Duration, 163 appResyncJitter time.Duration, 164 selfHealTimeout time.Duration, 165 selfHealBackoff *wait.Backoff, 166 selfHealBackoffCooldown time.Duration, 167 syncTimeout time.Duration, 168 repoErrorGracePeriod time.Duration, 169 metricsPort int, 170 metricsCacheExpiration time.Duration, 171 metricsApplicationLabels []string, 172 metricsApplicationConditions []string, 173 metricsClusterLabels []string, 174 kubectlParallelismLimit int64, 175 persistResourceHealth bool, 176 clusterSharding sharding.ClusterShardingCache, 177 applicationNamespaces []string, 178 rateLimiterConfig *ratelimiter.AppControllerRateLimiterConfig, 179 serverSideDiff bool, 180 dynamicClusterDistributionEnabled bool, 181 ignoreNormalizerOpts normalizers.IgnoreNormalizerOpts, 182 enableK8sEvent []string, 183 hydratorEnabled bool, 184 ) (*ApplicationController, error) { 185 log.Infof("appResyncPeriod=%v, appHardResyncPeriod=%v, appResyncJitter=%v", appResyncPeriod, appHardResyncPeriod, appResyncJitter) 186 db := db.NewDB(namespace, settingsMgr, kubeClientset) 187 if rateLimiterConfig == nil { 188 rateLimiterConfig = ratelimiter.GetDefaultAppRateLimiterConfig() 189 log.Info("Using default workqueue rate limiter config") 190 } 191 ctrl := ApplicationController{ 192 cache: argoCache, 193 namespace: namespace, 194 kubeClientset: kubeClientset, 195 kubectl: kubectl, 196 applicationClientset: applicationClientset, 197 appRefreshQueue: workqueue.NewTypedRateLimitingQueueWithConfig(ratelimiter.NewCustomAppControllerRateLimiter[string](rateLimiterConfig), workqueue.TypedRateLimitingQueueConfig[string]{Name: "app_reconciliation_queue"}), 198 appOperationQueue: workqueue.NewTypedRateLimitingQueueWithConfig(ratelimiter.NewCustomAppControllerRateLimiter[string](rateLimiterConfig), workqueue.TypedRateLimitingQueueConfig[string]{Name: "app_operation_processing_queue"}), 199 projectRefreshQueue: workqueue.NewTypedRateLimitingQueueWithConfig(ratelimiter.NewCustomAppControllerRateLimiter[string](rateLimiterConfig), workqueue.TypedRateLimitingQueueConfig[string]{Name: "project_reconciliation_queue"}), 200 appComparisonTypeRefreshQueue: workqueue.NewTypedRateLimitingQueue(ratelimiter.NewCustomAppControllerRateLimiter[string](rateLimiterConfig)), 201 appHydrateQueue: workqueue.NewTypedRateLimitingQueueWithConfig(ratelimiter.NewCustomAppControllerRateLimiter[string](rateLimiterConfig), workqueue.TypedRateLimitingQueueConfig[string]{Name: "app_hydration_queue"}), 202 hydrationQueue: workqueue.NewTypedRateLimitingQueueWithConfig(ratelimiter.NewCustomAppControllerRateLimiter[hydratortypes.HydrationQueueKey](rateLimiterConfig), workqueue.TypedRateLimitingQueueConfig[hydratortypes.HydrationQueueKey]{Name: "manifest_hydration_queue"}), 203 db: db, 204 statusRefreshTimeout: appResyncPeriod, 205 statusHardRefreshTimeout: appHardResyncPeriod, 206 statusRefreshJitter: appResyncJitter, 207 refreshRequestedApps: make(map[string]CompareWith), 208 refreshRequestedAppsMutex: &sync.Mutex{}, 209 auditLogger: argo.NewAuditLogger(kubeClientset, common.ApplicationController, enableK8sEvent), 210 settingsMgr: settingsMgr, 211 selfHealTimeout: selfHealTimeout, 212 selfHealBackoff: selfHealBackoff, 213 selfHealBackoffCooldown: selfHealBackoffCooldown, 214 syncTimeout: syncTimeout, 215 clusterSharding: clusterSharding, 216 projByNameCache: sync.Map{}, 217 applicationNamespaces: applicationNamespaces, 218 dynamicClusterDistributionEnabled: dynamicClusterDistributionEnabled, 219 ignoreNormalizerOpts: ignoreNormalizerOpts, 220 metricsClusterLabels: metricsClusterLabels, 221 } 222 if hydratorEnabled { 223 ctrl.hydrator = hydrator.NewHydrator(&ctrl, appResyncPeriod, commitClientset, repoClientset, db) 224 } 225 if kubectlParallelismLimit > 0 { 226 ctrl.kubectlSemaphore = semaphore.NewWeighted(kubectlParallelismLimit) 227 } 228 kubectl.SetOnKubectlRun(ctrl.onKubectlRun) 229 appInformer, appLister := ctrl.newApplicationInformerAndLister() 230 indexers := cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc} 231 projInformer := v1alpha1.NewAppProjectInformer(applicationClientset, namespace, appResyncPeriod, indexers) 232 var err error 233 _, err = projInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 234 AddFunc: func(obj any) { 235 if key, err := cache.MetaNamespaceKeyFunc(obj); err == nil { 236 ctrl.projectRefreshQueue.AddRateLimited(key) 237 if projMeta, ok := obj.(metav1.Object); ok { 238 ctrl.InvalidateProjectsCache(projMeta.GetName()) 239 } 240 } 241 }, 242 UpdateFunc: func(_, new any) { 243 if key, err := cache.MetaNamespaceKeyFunc(new); err == nil { 244 ctrl.projectRefreshQueue.AddRateLimited(key) 245 if projMeta, ok := new.(metav1.Object); ok { 246 ctrl.InvalidateProjectsCache(projMeta.GetName()) 247 } 248 } 249 }, 250 DeleteFunc: func(obj any) { 251 if key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj); err == nil { 252 // immediately push to queue for deletes 253 ctrl.projectRefreshQueue.Add(key) 254 if projMeta, ok := obj.(metav1.Object); ok { 255 ctrl.InvalidateProjectsCache(projMeta.GetName()) 256 } 257 } 258 }, 259 }) 260 if err != nil { 261 return nil, err 262 } 263 264 factory := informers.NewSharedInformerFactoryWithOptions(ctrl.kubeClientset, defaultDeploymentInformerResyncDuration, informers.WithNamespace(settingsMgr.GetNamespace())) 265 266 var deploymentInformer informerv1.DeploymentInformer 267 268 // only initialize deployment informer if dynamic distribution is enabled 269 if dynamicClusterDistributionEnabled { 270 deploymentInformer = factory.Apps().V1().Deployments() 271 } 272 273 readinessHealthCheck := func(_ *http.Request) error { 274 if dynamicClusterDistributionEnabled { 275 applicationControllerName := env.StringFromEnv(common.EnvAppControllerName, common.DefaultApplicationControllerName) 276 appControllerDeployment, err := deploymentInformer.Lister().Deployments(settingsMgr.GetNamespace()).Get(applicationControllerName) 277 if err != nil { 278 if !apierrors.IsNotFound(err) { 279 return fmt.Errorf("error retrieving Application Controller Deployment: %w", err) 280 } 281 appControllerDeployment = nil 282 } 283 if appControllerDeployment != nil { 284 if appControllerDeployment.Spec.Replicas != nil && int(*appControllerDeployment.Spec.Replicas) <= 0 { 285 return fmt.Errorf("application controller deployment replicas is not set or is less than 0, replicas: %d", appControllerDeployment.Spec.Replicas) 286 } 287 shard := env.ParseNumFromEnv(common.EnvControllerShard, -1, -math.MaxInt32, math.MaxInt32) 288 shard, err := sharding.GetOrUpdateShardFromConfigMap(kubeClientset.(*kubernetes.Clientset), settingsMgr, int(*appControllerDeployment.Spec.Replicas), shard) 289 if err != nil { 290 return fmt.Errorf("error while updating the heartbeat for to the Shard Mapping ConfigMap: %w", err) 291 } 292 293 // update the shard number in the clusterSharding, and resync all applications if the shard number is updated 294 if ctrl.clusterSharding.UpdateShard(shard) { 295 // update shard number in stateCache 296 ctrl.stateCache.UpdateShard(shard) 297 298 // resync all applications 299 apps, err := ctrl.appLister.List(labels.Everything()) 300 if err != nil { 301 return err 302 } 303 for _, app := range apps { 304 if !ctrl.canProcessApp(app) { 305 continue 306 } 307 key, err := cache.MetaNamespaceKeyFunc(app) 308 if err == nil { 309 ctrl.appRefreshQueue.AddRateLimited(key) 310 ctrl.clusterSharding.AddApp(app) 311 } 312 } 313 } 314 } 315 } 316 return nil 317 } 318 319 metricsAddr := fmt.Sprintf("0.0.0.0:%d", metricsPort) 320 321 ctrl.metricsServer, err = metrics.NewMetricsServer(metricsAddr, appLister, ctrl.canProcessApp, readinessHealthCheck, metricsApplicationLabels, metricsApplicationConditions, ctrl.db) 322 if err != nil { 323 return nil, err 324 } 325 if metricsCacheExpiration.Seconds() != 0 { 326 err = ctrl.metricsServer.SetExpiration(metricsCacheExpiration) 327 if err != nil { 328 return nil, err 329 } 330 } 331 stateCache := statecache.NewLiveStateCache(db, appInformer, ctrl.settingsMgr, ctrl.metricsServer, ctrl.handleObjectUpdated, clusterSharding, argo.NewResourceTracking()) 332 appStateManager := NewAppStateManager(db, applicationClientset, repoClientset, namespace, kubectl, ctrl.onKubectlRun, ctrl.settingsMgr, stateCache, ctrl.metricsServer, argoCache, ctrl.statusRefreshTimeout, argo.NewResourceTracking(), persistResourceHealth, repoErrorGracePeriod, serverSideDiff, ignoreNormalizerOpts) 333 ctrl.appInformer = appInformer 334 ctrl.appLister = appLister 335 ctrl.projInformer = projInformer 336 ctrl.deploymentInformer = deploymentInformer 337 ctrl.appStateManager = appStateManager 338 ctrl.stateCache = stateCache 339 340 return &ctrl, nil 341 } 342 343 func (ctrl *ApplicationController) InvalidateProjectsCache(names ...string) { 344 if len(names) > 0 { 345 for _, name := range names { 346 ctrl.projByNameCache.Delete(name) 347 } 348 } else if ctrl != nil { 349 ctrl.projByNameCache.Range(func(key, _ any) bool { 350 ctrl.projByNameCache.Delete(key) 351 return true 352 }) 353 } 354 } 355 356 func (ctrl *ApplicationController) GetMetricsServer() *metrics.MetricsServer { 357 return ctrl.metricsServer 358 } 359 360 func (ctrl *ApplicationController) onKubectlRun(command string) (kube.CleanupFunc, error) { 361 ctrl.metricsServer.IncKubectlExec(command) 362 if ctrl.kubectlSemaphore != nil { 363 if err := ctrl.kubectlSemaphore.Acquire(context.Background(), 1); err != nil { 364 return nil, err 365 } 366 ctrl.metricsServer.IncKubectlExecPending(command) 367 } 368 return func() { 369 if ctrl.kubectlSemaphore != nil { 370 ctrl.kubectlSemaphore.Release(1) 371 ctrl.metricsServer.DecKubectlExecPending(command) 372 } 373 }, nil 374 } 375 376 func isSelfReferencedApp(app *appv1.Application, ref corev1.ObjectReference) bool { 377 gvk := ref.GroupVersionKind() 378 return ref.UID == app.UID && 379 ref.Name == app.Name && 380 ref.Namespace == app.Namespace && 381 gvk.Group == application.Group && 382 gvk.Kind == application.ApplicationKind 383 } 384 385 func (ctrl *ApplicationController) newAppProjCache(name string) *appProjCache { 386 return &appProjCache{name: name, ctrl: ctrl} 387 } 388 389 type appProjCache struct { 390 name string 391 ctrl *ApplicationController 392 393 lock sync.Mutex 394 appProj *appv1.AppProject 395 } 396 397 // GetAppProject gets an AppProject from the cache. If the AppProject is not 398 // yet cached, retrieves the AppProject from the K8s control plane and stores 399 // in the cache. 400 func (projCache *appProjCache) GetAppProject(ctx context.Context) (*appv1.AppProject, error) { 401 projCache.lock.Lock() 402 defer projCache.lock.Unlock() 403 if projCache.appProj != nil { 404 return projCache.appProj, nil 405 } 406 proj, err := argo.GetAppProjectByName(ctx, projCache.name, applisters.NewAppProjectLister(projCache.ctrl.projInformer.GetIndexer()), projCache.ctrl.namespace, projCache.ctrl.settingsMgr, projCache.ctrl.db) 407 if err != nil { 408 return nil, err 409 } 410 projCache.appProj = proj 411 return projCache.appProj, nil 412 } 413 414 // getAppProj gets the AppProject for the given Application app. 415 func (ctrl *ApplicationController) getAppProj(app *appv1.Application) (*appv1.AppProject, error) { 416 projCache, _ := ctrl.projByNameCache.Load(app.Spec.GetProject()) 417 if projCache == nil { 418 projCache = ctrl.newAppProjCache(app.Spec.GetProject()) 419 ctrl.projByNameCache.Store(app.Spec.GetProject(), projCache) 420 } 421 proj, err := projCache.(*appProjCache).GetAppProject(context.TODO()) 422 if err != nil { 423 if apierrors.IsNotFound(err) { 424 return nil, err 425 } 426 return nil, fmt.Errorf("could not retrieve AppProject '%s' from cache: %w", app.Spec.Project, err) 427 } 428 if !proj.IsAppNamespacePermitted(app, ctrl.namespace) { 429 return nil, argo.ErrProjectNotPermitted(app.GetName(), app.GetNamespace(), proj.GetName()) 430 } 431 return proj, nil 432 } 433 434 func (ctrl *ApplicationController) handleObjectUpdated(managedByApp map[string]bool, ref corev1.ObjectReference) { 435 // if namespaced resource is not managed by any app it might be orphaned resource of some other apps 436 if len(managedByApp) == 0 && ref.Namespace != "" { 437 // retrieve applications which monitor orphaned resources in the same namespace and refresh them unless resource is denied in app project 438 if objs, err := ctrl.appInformer.GetIndexer().ByIndex(orphanedIndex, ref.Namespace); err == nil { 439 for i := range objs { 440 app, ok := objs[i].(*appv1.Application) 441 if !ok { 442 continue 443 } 444 445 managedByApp[app.InstanceName(ctrl.namespace)] = true 446 } 447 } 448 } 449 for appName, isManagedResource := range managedByApp { 450 // The appName is given as <namespace>_<name>, but the indexer needs it 451 // format <namespace>/<name> 452 appKey := ctrl.toAppKey(appName) 453 obj, exists, err := ctrl.appInformer.GetIndexer().GetByKey(appKey) 454 app, ok := obj.(*appv1.Application) 455 if exists && err == nil && ok && isSelfReferencedApp(app, ref) { 456 // Don't force refresh app if related resource is application itself. This prevents infinite reconciliation loop. 457 continue 458 } 459 460 if !ctrl.canProcessApp(obj) { 461 // Don't force refresh app if app belongs to a different controller shard or is outside the allowed namespaces. 462 continue 463 } 464 465 logCtx := log.WithFields(applog.GetAppLogFields(app)) 466 // Enforce application's permission for the source namespace 467 _, err = ctrl.getAppProj(app) 468 if err != nil { 469 logCtx.Errorf("Unable to determine project for app '%s': %v", app.QualifiedName(), err) 470 continue 471 } 472 473 level := ComparisonWithNothing 474 if isManagedResource { 475 level = CompareWithRecent 476 } 477 478 namespace := ref.Namespace 479 if ref.Namespace == "" { 480 namespace = "(cluster-scoped)" 481 } 482 logCtx.WithFields(log.Fields{ 483 "comparison-level": level, 484 "namespace": namespace, 485 "name": ref.Name, 486 "api-version": ref.APIVersion, 487 "kind": ref.Kind, 488 "server": app.Spec.Destination.Server, 489 "cluster-name": app.Spec.Destination.Name, 490 }).Debug("Requesting app refresh caused by object update") 491 492 ctrl.requestAppRefresh(app.QualifiedName(), &level, nil) 493 } 494 } 495 496 // setAppManagedResources will build a list of ResourceDiff based on the provided comparisonResult 497 // and persist app resources related data in the cache. Will return the persisted ApplicationTree. 498 func (ctrl *ApplicationController) setAppManagedResources(destCluster *appv1.Cluster, a *appv1.Application, comparisonResult *comparisonResult) (*appv1.ApplicationTree, error) { 499 ts := stats.NewTimingStats() 500 defer func() { 501 logCtx := log.WithFields(applog.GetAppLogFields(a)) 502 for k, v := range ts.Timings() { 503 logCtx = logCtx.WithField(k, v.Milliseconds()) 504 } 505 logCtx = logCtx.WithField("time_ms", time.Since(ts.StartTime).Milliseconds()) 506 logCtx.Debug("Finished setting app managed resources") 507 }() 508 managedResources, err := ctrl.hideSecretData(destCluster, a, comparisonResult) 509 ts.AddCheckpoint("hide_secret_data_ms") 510 if err != nil { 511 return nil, fmt.Errorf("error getting managed resources: %w", err) 512 } 513 tree, err := ctrl.getResourceTree(destCluster, a, managedResources) 514 ts.AddCheckpoint("get_resource_tree_ms") 515 if err != nil { 516 return nil, fmt.Errorf("error getting resource tree: %w", err) 517 } 518 err = ctrl.cache.SetAppResourcesTree(a.InstanceName(ctrl.namespace), tree) 519 ts.AddCheckpoint("set_app_resources_tree_ms") 520 if err != nil { 521 return nil, fmt.Errorf("error setting app resource tree: %w", err) 522 } 523 err = ctrl.cache.SetAppManagedResources(a.InstanceName(ctrl.namespace), managedResources) 524 ts.AddCheckpoint("set_app_managed_resources_ms") 525 if err != nil { 526 return nil, fmt.Errorf("error setting app managed resources: %w", err) 527 } 528 return tree, nil 529 } 530 531 // returns true of given resources exist in the namespace by default and not managed by the user 532 func isKnownOrphanedResourceExclusion(key kube.ResourceKey, proj *appv1.AppProject) bool { 533 if key.Namespace == "default" && key.Group == "" && key.Kind == kube.ServiceKind && key.Name == "kubernetes" { 534 return true 535 } 536 if key.Group == "" && key.Kind == kube.ServiceAccountKind && key.Name == "default" { 537 return true 538 } 539 if key.Group == "" && key.Kind == "ConfigMap" && key.Name == "kube-root-ca.crt" { 540 return true 541 } 542 list := proj.Spec.OrphanedResources.Ignore 543 for _, item := range list { 544 if item.Kind == "" || glob.Match(item.Kind, key.Kind) { 545 if glob.Match(item.Group, key.Group) { 546 if item.Name == "" || glob.Match(item.Name, key.Name) { 547 return true 548 } 549 } 550 } 551 } 552 return false 553 } 554 555 func (ctrl *ApplicationController) getResourceTree(destCluster *appv1.Cluster, a *appv1.Application, managedResources []*appv1.ResourceDiff) (*appv1.ApplicationTree, error) { 556 ts := stats.NewTimingStats() 557 defer func() { 558 logCtx := log.WithFields(applog.GetAppLogFields(a)) 559 for k, v := range ts.Timings() { 560 logCtx = logCtx.WithField(k, v.Milliseconds()) 561 } 562 logCtx = logCtx.WithField("time_ms", time.Since(ts.StartTime).Milliseconds()) 563 logCtx.Debug("Finished getting resource tree") 564 }() 565 nodes := make([]appv1.ResourceNode, 0) 566 proj, err := ctrl.getAppProj(a) 567 ts.AddCheckpoint("get_app_proj_ms") 568 if err != nil { 569 return nil, fmt.Errorf("failed to get project: %w", err) 570 } 571 572 orphanedNodesMap := make(map[kube.ResourceKey]appv1.ResourceNode) 573 warnOrphaned := true 574 if proj.Spec.OrphanedResources != nil { 575 orphanedNodesMap, err = ctrl.stateCache.GetNamespaceTopLevelResources(destCluster, a.Spec.Destination.Namespace) 576 if err != nil { 577 return nil, fmt.Errorf("failed to get namespace top-level resources: %w", err) 578 } 579 warnOrphaned = proj.Spec.OrphanedResources.IsWarn() 580 } 581 ts.AddCheckpoint("get_orphaned_resources_ms") 582 managedResourcesKeys := make([]kube.ResourceKey, 0) 583 for i := range managedResources { 584 managedResource := managedResources[i] 585 delete(orphanedNodesMap, kube.NewResourceKey(managedResource.Group, managedResource.Kind, managedResource.Namespace, managedResource.Name)) 586 live := &unstructured.Unstructured{} 587 err := json.Unmarshal([]byte(managedResource.LiveState), &live) 588 if err != nil { 589 return nil, fmt.Errorf("failed to unmarshal live state of managed resources: %w", err) 590 } 591 592 if live == nil { 593 target := &unstructured.Unstructured{} 594 err = json.Unmarshal([]byte(managedResource.TargetState), &target) 595 if err != nil { 596 return nil, fmt.Errorf("failed to unmarshal target state of managed resources: %w", err) 597 } 598 nodes = append(nodes, appv1.ResourceNode{ 599 ResourceRef: appv1.ResourceRef{ 600 Version: target.GroupVersionKind().Version, 601 Name: managedResource.Name, 602 Kind: managedResource.Kind, 603 Group: managedResource.Group, 604 Namespace: managedResource.Namespace, 605 }, 606 Health: &appv1.HealthStatus{ 607 Status: health.HealthStatusMissing, 608 }, 609 }) 610 } else { 611 managedResourcesKeys = append(managedResourcesKeys, kube.GetResourceKey(live)) 612 } 613 } 614 err = ctrl.stateCache.IterateHierarchyV2(destCluster, managedResourcesKeys, func(child appv1.ResourceNode, _ string) bool { 615 permitted, _ := proj.IsResourcePermitted(schema.GroupKind{Group: child.Group, Kind: child.Kind}, child.Namespace, destCluster, func(project string) ([]*appv1.Cluster, error) { 616 clusters, err := ctrl.db.GetProjectClusters(context.TODO(), project) 617 if err != nil { 618 return nil, fmt.Errorf("failed to get project clusters: %w", err) 619 } 620 return clusters, nil 621 }) 622 if !permitted { 623 return false 624 } 625 nodes = append(nodes, child) 626 return true 627 }) 628 if err != nil { 629 return nil, fmt.Errorf("failed to iterate resource hierarchy v2: %w", err) 630 } 631 ts.AddCheckpoint("process_managed_resources_ms") 632 orphanedNodes := make([]appv1.ResourceNode, 0) 633 orphanedNodesKeys := make([]kube.ResourceKey, 0) 634 for k := range orphanedNodesMap { 635 if k.Namespace != "" && proj.IsGroupKindPermitted(k.GroupKind(), true) && !isKnownOrphanedResourceExclusion(k, proj) { 636 orphanedNodesKeys = append(orphanedNodesKeys, k) 637 } 638 } 639 err = ctrl.stateCache.IterateHierarchyV2(destCluster, orphanedNodesKeys, func(child appv1.ResourceNode, appName string) bool { 640 belongToAnotherApp := false 641 if appName != "" { 642 appKey := ctrl.toAppKey(appName) 643 if _, exists, err := ctrl.appInformer.GetIndexer().GetByKey(appKey); exists && err == nil { 644 belongToAnotherApp = true 645 } 646 } 647 648 if belongToAnotherApp { 649 return false 650 } 651 652 permitted, _ := proj.IsResourcePermitted(schema.GroupKind{Group: child.Group, Kind: child.Kind}, child.Namespace, destCluster, func(project string) ([]*appv1.Cluster, error) { 653 return ctrl.db.GetProjectClusters(context.TODO(), project) 654 }) 655 656 if !permitted { 657 return false 658 } 659 orphanedNodes = append(orphanedNodes, child) 660 return true 661 }) 662 if err != nil { 663 return nil, err 664 } 665 666 var conditions []appv1.ApplicationCondition 667 if len(orphanedNodes) > 0 && warnOrphaned { 668 conditions = []appv1.ApplicationCondition{{ 669 Type: appv1.ApplicationConditionOrphanedResourceWarning, 670 Message: fmt.Sprintf("Application has %d orphaned resources", len(orphanedNodes)), 671 }} 672 } 673 ctrl.metricsServer.SetOrphanedResourcesMetric(a, len(orphanedNodes)) 674 a.Status.SetConditions(conditions, map[appv1.ApplicationConditionType]bool{appv1.ApplicationConditionOrphanedResourceWarning: true}) 675 sort.Slice(orphanedNodes, func(i, j int) bool { 676 return orphanedNodes[i].ResourceRef.String() < orphanedNodes[j].ResourceRef.String() 677 }) 678 ts.AddCheckpoint("process_orphaned_resources_ms") 679 680 hosts, err := ctrl.getAppHosts(destCluster, a, nodes) 681 if err != nil { 682 return nil, fmt.Errorf("failed to get app hosts: %w", err) 683 } 684 ts.AddCheckpoint("get_app_hosts_ms") 685 return &appv1.ApplicationTree{Nodes: nodes, OrphanedNodes: orphanedNodes, Hosts: hosts}, nil 686 } 687 688 func (ctrl *ApplicationController) getAppHosts(destCluster *appv1.Cluster, a *appv1.Application, appNodes []appv1.ResourceNode) ([]appv1.HostInfo, error) { 689 ts := stats.NewTimingStats() 690 defer func() { 691 logCtx := log.WithFields(applog.GetAppLogFields(a)) 692 for k, v := range ts.Timings() { 693 logCtx = logCtx.WithField(k, v.Milliseconds()) 694 } 695 logCtx = logCtx.WithField("time_ms", time.Since(ts.StartTime).Milliseconds()) 696 logCtx.Debug("Finished getting app hosts") 697 }() 698 supportedResourceNames := map[corev1.ResourceName]bool{ 699 corev1.ResourceCPU: true, 700 corev1.ResourceStorage: true, 701 corev1.ResourceMemory: true, 702 } 703 appPods := map[kube.ResourceKey]bool{} 704 for _, node := range appNodes { 705 if node.Group == "" && node.Kind == kube.PodKind { 706 appPods[kube.NewResourceKey(node.Group, node.Kind, node.Namespace, node.Name)] = true 707 } 708 } 709 710 allNodesInfo := map[string]statecache.NodeInfo{} 711 allPodsByNode := map[string][]statecache.PodInfo{} 712 appPodsByNode := map[string][]statecache.PodInfo{} 713 err := ctrl.stateCache.IterateResources(destCluster, func(res *clustercache.Resource, info *statecache.ResourceInfo) { 714 key := res.ResourceKey() 715 716 switch { 717 case info.NodeInfo != nil && key.Group == "" && key.Kind == "Node": 718 allNodesInfo[key.Name] = *info.NodeInfo 719 case info.PodInfo != nil && key.Group == "" && key.Kind == kube.PodKind: 720 if appPods[key] { 721 appPodsByNode[info.PodInfo.NodeName] = append(appPodsByNode[info.PodInfo.NodeName], *info.PodInfo) 722 } else { 723 allPodsByNode[info.PodInfo.NodeName] = append(allPodsByNode[info.PodInfo.NodeName], *info.PodInfo) 724 } 725 } 726 }) 727 ts.AddCheckpoint("iterate_resources_ms") 728 if err != nil { 729 return nil, err 730 } 731 732 var hosts []appv1.HostInfo 733 for nodeName, appPods := range appPodsByNode { 734 node, ok := allNodesInfo[nodeName] 735 if !ok { 736 continue 737 } 738 739 neighbors := allPodsByNode[nodeName] 740 741 resources := map[corev1.ResourceName]appv1.HostResourceInfo{} 742 for name, resource := range node.Capacity { 743 info := resources[name] 744 info.ResourceName = name 745 info.Capacity += resource.MilliValue() 746 resources[name] = info 747 } 748 749 for _, pod := range appPods { 750 for name, resource := range pod.ResourceRequests { 751 if !supportedResourceNames[name] { 752 continue 753 } 754 755 info := resources[name] 756 info.RequestedByApp += resource.MilliValue() 757 resources[name] = info 758 } 759 } 760 761 for _, pod := range neighbors { 762 for name, resource := range pod.ResourceRequests { 763 if !supportedResourceNames[name] || pod.Phase == corev1.PodSucceeded || pod.Phase == corev1.PodFailed { 764 continue 765 } 766 info := resources[name] 767 info.RequestedByNeighbors += resource.MilliValue() 768 resources[name] = info 769 } 770 } 771 772 var resourcesInfo []appv1.HostResourceInfo 773 for _, info := range resources { 774 if supportedResourceNames[info.ResourceName] && info.Capacity > 0 { 775 resourcesInfo = append(resourcesInfo, info) 776 } 777 } 778 sort.Slice(resourcesInfo, func(i, j int) bool { 779 return resourcesInfo[i].ResourceName < resourcesInfo[j].ResourceName 780 }) 781 782 allowedNodeLabels := ctrl.settingsMgr.GetAllowedNodeLabels() 783 nodeLabels := make(map[string]string) 784 for _, label := range allowedNodeLabels { 785 if val, ok := node.Labels[label]; ok { 786 nodeLabels[label] = val 787 } 788 } 789 790 hosts = append(hosts, appv1.HostInfo{Name: nodeName, SystemInfo: node.SystemInfo, ResourcesInfo: resourcesInfo, Labels: nodeLabels}) 791 } 792 ts.AddCheckpoint("process_app_pods_by_node_ms") 793 return hosts, nil 794 } 795 796 func (ctrl *ApplicationController) hideSecretData(destCluster *appv1.Cluster, app *appv1.Application, comparisonResult *comparisonResult) ([]*appv1.ResourceDiff, error) { 797 items := make([]*appv1.ResourceDiff, len(comparisonResult.managedResources)) 798 for i := range comparisonResult.managedResources { 799 res := comparisonResult.managedResources[i] 800 item := appv1.ResourceDiff{ 801 Namespace: res.Namespace, 802 Name: res.Name, 803 Group: res.Group, 804 Kind: res.Kind, 805 Hook: res.Hook, 806 ResourceVersion: res.ResourceVersion, 807 } 808 809 target := res.Target 810 live := res.Live 811 resDiff := res.Diff 812 if res.Kind == kube.SecretKind && res.Group == "" { 813 var err error 814 target, live, err = diff.HideSecretData(res.Target, res.Live, ctrl.settingsMgr.GetSensitiveAnnotations()) 815 if err != nil { 816 return nil, fmt.Errorf("error hiding secret data: %w", err) 817 } 818 compareOptions, err := ctrl.settingsMgr.GetResourceCompareOptions() 819 if err != nil { 820 return nil, fmt.Errorf("error getting resource compare options: %w", err) 821 } 822 resourceOverrides, err := ctrl.settingsMgr.GetResourceOverrides() 823 if err != nil { 824 return nil, fmt.Errorf("error getting resource overrides: %w", err) 825 } 826 appLabelKey, err := ctrl.settingsMgr.GetAppInstanceLabelKey() 827 if err != nil { 828 return nil, fmt.Errorf("error getting app instance label key: %w", err) 829 } 830 trackingMethod, err := ctrl.settingsMgr.GetTrackingMethod() 831 if err != nil { 832 return nil, fmt.Errorf("error getting tracking method: %w", err) 833 } 834 835 clusterCache, err := ctrl.stateCache.GetClusterCache(destCluster) 836 if err != nil { 837 return nil, fmt.Errorf("error getting cluster cache: %w", err) 838 } 839 diffConfig, err := argodiff.NewDiffConfigBuilder(). 840 WithDiffSettings(app.Spec.IgnoreDifferences, resourceOverrides, compareOptions.IgnoreAggregatedRoles, ctrl.ignoreNormalizerOpts). 841 WithTracking(appLabelKey, trackingMethod). 842 WithNoCache(). 843 WithLogger(logutils.NewLogrusLogger(logutils.NewWithCurrentConfig())). 844 WithGVKParser(clusterCache.GetGVKParser()). 845 Build() 846 if err != nil { 847 return nil, fmt.Errorf("appcontroller error building diff config: %w", err) 848 } 849 850 diffResult, err := argodiff.StateDiff(live, target, diffConfig) 851 if err != nil { 852 return nil, fmt.Errorf("error applying diff: %w", err) 853 } 854 resDiff = diffResult 855 } 856 857 if live != nil { 858 data, err := json.Marshal(live) 859 if err != nil { 860 return nil, fmt.Errorf("error marshaling live json: %w", err) 861 } 862 item.LiveState = string(data) 863 } else { 864 item.LiveState = "null" 865 } 866 867 if target != nil { 868 data, err := json.Marshal(target) 869 if err != nil { 870 return nil, fmt.Errorf("error marshaling target json: %w", err) 871 } 872 item.TargetState = string(data) 873 } else { 874 item.TargetState = "null" 875 } 876 item.PredictedLiveState = string(resDiff.PredictedLive) 877 item.NormalizedLiveState = string(resDiff.NormalizedLive) 878 item.Modified = resDiff.Modified 879 880 items[i] = &item 881 } 882 return items, nil 883 } 884 885 // Run starts the Application CRD controller. 886 func (ctrl *ApplicationController) Run(ctx context.Context, statusProcessors int, operationProcessors int) { 887 defer runtime.HandleCrash() 888 defer ctrl.appRefreshQueue.ShutDown() 889 defer ctrl.appComparisonTypeRefreshQueue.ShutDown() 890 defer ctrl.appOperationQueue.ShutDown() 891 defer ctrl.projectRefreshQueue.ShutDown() 892 defer ctrl.appHydrateQueue.ShutDown() 893 defer ctrl.hydrationQueue.ShutDown() 894 895 ctrl.RegisterClusterSecretUpdater(ctx) 896 ctrl.metricsServer.RegisterClustersInfoSource(ctx, ctrl.stateCache, ctrl.db, ctrl.metricsClusterLabels) 897 898 if ctrl.dynamicClusterDistributionEnabled { 899 // only start deployment informer if dynamic distribution is enabled 900 go ctrl.deploymentInformer.Informer().Run(ctx.Done()) 901 } 902 903 clusters, err := ctrl.db.ListClusters(ctx) 904 if err != nil { 905 log.Warnf("Cannot init sharding. Error while querying clusters list from database: %v", err) 906 } else { 907 appItems, err := ctrl.getAppList(metav1.ListOptions{}) 908 909 if err != nil { 910 log.Warnf("Cannot init sharding. Error while querying application list from database: %v", err) 911 } else { 912 ctrl.clusterSharding.Init(clusters, appItems) 913 } 914 } 915 916 go ctrl.appInformer.Run(ctx.Done()) 917 go ctrl.projInformer.Run(ctx.Done()) 918 919 errors.CheckError(ctrl.stateCache.Init()) 920 921 if !cache.WaitForCacheSync(ctx.Done(), ctrl.appInformer.HasSynced, ctrl.projInformer.HasSynced) { 922 log.Error("Timed out waiting for caches to sync") 923 return 924 } 925 926 go func() { errors.CheckError(ctrl.stateCache.Run(ctx)) }() 927 go func() { errors.CheckError(ctrl.metricsServer.ListenAndServe()) }() 928 929 for i := 0; i < statusProcessors; i++ { 930 go wait.Until(func() { 931 for ctrl.processAppRefreshQueueItem() { 932 } 933 }, time.Second, ctx.Done()) 934 } 935 936 for i := 0; i < operationProcessors; i++ { 937 go wait.Until(func() { 938 for ctrl.processAppOperationQueueItem() { 939 } 940 }, time.Second, ctx.Done()) 941 } 942 943 go wait.Until(func() { 944 for ctrl.processAppComparisonTypeQueueItem() { 945 } 946 }, time.Second, ctx.Done()) 947 948 go wait.Until(func() { 949 for ctrl.processProjectQueueItem() { 950 } 951 }, time.Second, ctx.Done()) 952 953 if ctrl.hydrator != nil { 954 go wait.Until(func() { 955 for ctrl.processAppHydrateQueueItem() { 956 } 957 }, time.Second, ctx.Done()) 958 959 go wait.Until(func() { 960 for ctrl.processHydrationQueueItem() { 961 } 962 }, time.Second, ctx.Done()) 963 } 964 965 <-ctx.Done() 966 } 967 968 // requestAppRefresh adds a request for given app to the refresh queue. appName 969 // needs to be the qualified name of the application, i.e. <namespace>/<name>. 970 func (ctrl *ApplicationController) requestAppRefresh(appName string, compareWith *CompareWith, after *time.Duration) { 971 key := ctrl.toAppKey(appName) 972 973 if compareWith != nil && after != nil { 974 ctrl.appComparisonTypeRefreshQueue.AddAfter(fmt.Sprintf("%s/%d", key, *compareWith), *after) 975 } else { 976 if compareWith != nil { 977 ctrl.refreshRequestedAppsMutex.Lock() 978 ctrl.refreshRequestedApps[key] = compareWith.Max(ctrl.refreshRequestedApps[key]) 979 ctrl.refreshRequestedAppsMutex.Unlock() 980 } 981 if after != nil { 982 ctrl.appRefreshQueue.AddAfter(key, *after) 983 } else { 984 ctrl.appRefreshQueue.AddRateLimited(key) 985 } 986 } 987 } 988 989 func (ctrl *ApplicationController) isRefreshRequested(appName string) (bool, CompareWith) { 990 ctrl.refreshRequestedAppsMutex.Lock() 991 defer ctrl.refreshRequestedAppsMutex.Unlock() 992 level, ok := ctrl.refreshRequestedApps[appName] 993 if ok { 994 delete(ctrl.refreshRequestedApps, appName) 995 } 996 return ok, level 997 } 998 999 func (ctrl *ApplicationController) processAppOperationQueueItem() (processNext bool) { 1000 appKey, shutdown := ctrl.appOperationQueue.Get() 1001 if shutdown { 1002 processNext = false 1003 return 1004 } 1005 processNext = true 1006 defer func() { 1007 if r := recover(); r != nil { 1008 log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) 1009 } 1010 ctrl.appOperationQueue.Done(appKey) 1011 }() 1012 1013 obj, exists, err := ctrl.appInformer.GetIndexer().GetByKey(appKey) 1014 if err != nil { 1015 log.Errorf("Failed to get application '%s' from informer index: %+v", appKey, err) 1016 return 1017 } 1018 if !exists { 1019 // This happens after app was deleted, but the work queue still had an entry for it. 1020 return 1021 } 1022 origApp, ok := obj.(*appv1.Application) 1023 if !ok { 1024 log.Warnf("Key '%s' in index is not an application", appKey) 1025 return 1026 } 1027 app := origApp.DeepCopy() 1028 logCtx := log.WithFields(applog.GetAppLogFields(app)) 1029 ts := stats.NewTimingStats() 1030 defer func() { 1031 for k, v := range ts.Timings() { 1032 logCtx = logCtx.WithField(k, v.Milliseconds()) 1033 } 1034 logCtx = logCtx.WithField("time_ms", time.Since(ts.StartTime).Milliseconds()) 1035 logCtx.Debug("Finished processing app operation queue item") 1036 }() 1037 1038 if app.Operation != nil { 1039 // If we get here, we are about to process an operation, but we cannot rely on informer since it might have stale data. 1040 // So always retrieve the latest version to ensure it is not stale to avoid unnecessary syncing. 1041 // We cannot rely on informer since applications might be updated by both application controller and api server. 1042 freshApp, err := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(app.ObjectMeta.Namespace).Get(context.Background(), app.Name, metav1.GetOptions{}) 1043 if err != nil { 1044 logCtx.Errorf("Failed to retrieve latest application state: %v", err) 1045 return 1046 } 1047 app = freshApp 1048 } 1049 ts.AddCheckpoint("get_fresh_app_ms") 1050 1051 if app.Operation != nil { 1052 ctrl.processRequestedAppOperation(app) 1053 ts.AddCheckpoint("process_requested_app_operation_ms") 1054 } else if app.DeletionTimestamp != nil { 1055 if err = ctrl.finalizeApplicationDeletion(app, func(project string) ([]*appv1.Cluster, error) { 1056 return ctrl.db.GetProjectClusters(context.Background(), project) 1057 }); err != nil { 1058 ctrl.setAppCondition(app, appv1.ApplicationCondition{ 1059 Type: appv1.ApplicationConditionDeletionError, 1060 Message: err.Error(), 1061 }) 1062 message := fmt.Sprintf("Unable to delete application resources: %v", err.Error()) 1063 ctrl.logAppEvent(context.TODO(), app, argo.EventInfo{Reason: argo.EventReasonStatusRefreshed, Type: corev1.EventTypeWarning}, message) 1064 } 1065 ts.AddCheckpoint("finalize_application_deletion_ms") 1066 } 1067 return 1068 } 1069 1070 func (ctrl *ApplicationController) processAppComparisonTypeQueueItem() (processNext bool) { 1071 key, shutdown := ctrl.appComparisonTypeRefreshQueue.Get() 1072 processNext = true 1073 1074 defer func() { 1075 if r := recover(); r != nil { 1076 log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) 1077 } 1078 ctrl.appComparisonTypeRefreshQueue.Done(key) 1079 }() 1080 if shutdown { 1081 processNext = false 1082 return 1083 } 1084 1085 if parts := strings.Split(key, "/"); len(parts) != 3 { 1086 log.Warnf("Unexpected key format in appComparisonTypeRefreshTypeQueue. Key should consists of namespace/name/comparisonType but got: %s", key) 1087 } else { 1088 compareWith, err := strconv.Atoi(parts[2]) 1089 if err != nil { 1090 log.Warnf("Unable to parse comparison type: %v", err) 1091 return 1092 } 1093 ctrl.requestAppRefresh(ctrl.toAppQualifiedName(parts[1], parts[0]), CompareWith(compareWith).Pointer(), nil) 1094 } 1095 return 1096 } 1097 1098 func (ctrl *ApplicationController) processProjectQueueItem() (processNext bool) { 1099 key, shutdown := ctrl.projectRefreshQueue.Get() 1100 processNext = true 1101 1102 defer func() { 1103 if r := recover(); r != nil { 1104 log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) 1105 } 1106 ctrl.projectRefreshQueue.Done(key) 1107 }() 1108 if shutdown { 1109 processNext = false 1110 return 1111 } 1112 obj, exists, err := ctrl.projInformer.GetIndexer().GetByKey(key) 1113 if err != nil { 1114 log.Errorf("Failed to get project '%s' from informer index: %+v", key, err) 1115 return 1116 } 1117 if !exists { 1118 // This happens after appproj was deleted, but the work queue still had an entry for it. 1119 return 1120 } 1121 origProj, ok := obj.(*appv1.AppProject) 1122 if !ok { 1123 log.Warnf("Key '%s' in index is not an appproject", key) 1124 return 1125 } 1126 1127 if origProj.DeletionTimestamp != nil && origProj.HasFinalizer() { 1128 if err := ctrl.finalizeProjectDeletion(origProj.DeepCopy()); err != nil { 1129 log.Warnf("Failed to finalize project deletion: %v", err) 1130 } 1131 } 1132 return 1133 } 1134 1135 func (ctrl *ApplicationController) finalizeProjectDeletion(proj *appv1.AppProject) error { 1136 apps, err := ctrl.appLister.Applications(ctrl.namespace).List(labels.Everything()) 1137 if err != nil { 1138 return fmt.Errorf("error listing applications: %w", err) 1139 } 1140 appsCount := 0 1141 for i := range apps { 1142 if apps[i].Spec.GetProject() == proj.Name { 1143 appsCount++ 1144 } 1145 } 1146 if appsCount == 0 { 1147 return ctrl.removeProjectFinalizer(proj) 1148 } 1149 log.Infof("Cannot remove project '%s' finalizer as is referenced by %d applications", proj.Name, appsCount) 1150 return nil 1151 } 1152 1153 func (ctrl *ApplicationController) removeProjectFinalizer(proj *appv1.AppProject) error { 1154 proj.RemoveFinalizer() 1155 var patch []byte 1156 patch, _ = json.Marshal(map[string]any{ 1157 "metadata": map[string]any{ 1158 "finalizers": proj.Finalizers, 1159 }, 1160 }) 1161 _, err := ctrl.applicationClientset.ArgoprojV1alpha1().AppProjects(ctrl.namespace).Patch(context.Background(), proj.Name, types.MergePatchType, patch, metav1.PatchOptions{}) 1162 return err 1163 } 1164 1165 // shouldBeDeleted returns whether a given resource obj should be deleted on cascade delete of application app 1166 func (ctrl *ApplicationController) shouldBeDeleted(app *appv1.Application, obj *unstructured.Unstructured) bool { 1167 return !kube.IsCRD(obj) && !isSelfReferencedApp(app, kube.GetObjectRef(obj)) && 1168 !resourceutil.HasAnnotationOption(obj, synccommon.AnnotationSyncOptions, synccommon.SyncOptionDisableDeletion) && 1169 !resourceutil.HasAnnotationOption(obj, helm.ResourcePolicyAnnotation, helm.ResourcePolicyKeep) 1170 } 1171 1172 func (ctrl *ApplicationController) getPermittedAppLiveObjects(destCluster *appv1.Cluster, app *appv1.Application, proj *appv1.AppProject, projectClusters func(project string) ([]*appv1.Cluster, error)) (map[kube.ResourceKey]*unstructured.Unstructured, error) { 1173 objsMap, err := ctrl.stateCache.GetManagedLiveObjs(destCluster, app, []*unstructured.Unstructured{}) 1174 if err != nil { 1175 return nil, err 1176 } 1177 // Don't delete live resources which are not permitted in the app project 1178 for k, v := range objsMap { 1179 permitted, err := proj.IsLiveResourcePermitted(v, destCluster, projectClusters) 1180 if err != nil { 1181 return nil, err 1182 } 1183 1184 if !permitted { 1185 delete(objsMap, k) 1186 } 1187 } 1188 return objsMap, nil 1189 } 1190 1191 func (ctrl *ApplicationController) finalizeApplicationDeletion(app *appv1.Application, projectClusters func(project string) ([]*appv1.Cluster, error)) error { 1192 logCtx := log.WithFields(applog.GetAppLogFields(app)) 1193 // Get refreshed application info, since informer app copy might be stale 1194 app, err := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(app.Namespace).Get(context.Background(), app.Name, metav1.GetOptions{}) 1195 if err != nil { 1196 if !apierrors.IsNotFound(err) { 1197 logCtx.Errorf("Unable to get refreshed application info prior deleting resources: %v", err) 1198 } 1199 return nil 1200 } 1201 proj, err := ctrl.getAppProj(app) 1202 if err != nil { 1203 return err 1204 } 1205 destCluster, err := argo.GetDestinationCluster(context.Background(), app.Spec.Destination, ctrl.db) 1206 if err != nil { 1207 logCtx.Warnf("Unable to get destination cluster: %v", err) 1208 app.UnSetCascadedDeletion() 1209 app.UnSetPostDeleteFinalizerAll() 1210 if err := ctrl.updateFinalizers(app); err != nil { 1211 return err 1212 } 1213 logCtx.Infof("Resource entries removed from undefined cluster") 1214 return nil 1215 } 1216 clusterRESTConfig, err := destCluster.RESTConfig() 1217 if err != nil { 1218 return err 1219 } 1220 config := metrics.AddMetricsTransportWrapper(ctrl.metricsServer, app, clusterRESTConfig) 1221 1222 if app.CascadedDeletion() { 1223 deletionApproved := app.IsDeletionConfirmed(app.DeletionTimestamp.Time) 1224 1225 logCtx.Infof("Deleting resources") 1226 // ApplicationDestination points to a valid cluster, so we may clean up the live objects 1227 objs := make([]*unstructured.Unstructured, 0) 1228 objsMap, err := ctrl.getPermittedAppLiveObjects(destCluster, app, proj, projectClusters) 1229 if err != nil { 1230 return err 1231 } 1232 1233 for k := range objsMap { 1234 // Wait for objects pending deletion to complete before proceeding with next sync wave 1235 if objsMap[k].GetDeletionTimestamp() != nil { 1236 logCtx.Infof("%d objects remaining for deletion", len(objsMap)) 1237 return nil 1238 } 1239 1240 if ctrl.shouldBeDeleted(app, objsMap[k]) { 1241 objs = append(objs, objsMap[k]) 1242 if res, ok := app.Status.FindResource(k); ok && res.RequiresDeletionConfirmation && !deletionApproved { 1243 logCtx.Infof("Resource %v requires manual confirmation to delete", k) 1244 return nil 1245 } 1246 } 1247 } 1248 1249 filteredObjs := FilterObjectsForDeletion(objs) 1250 1251 propagationPolicy := metav1.DeletePropagationForeground 1252 if app.GetPropagationPolicy() == appv1.BackgroundPropagationPolicyFinalizer { 1253 propagationPolicy = metav1.DeletePropagationBackground 1254 } 1255 logCtx.Infof("Deleting application's resources with %s propagation policy", propagationPolicy) 1256 1257 err = kube.RunAllAsync(len(filteredObjs), func(i int) error { 1258 obj := filteredObjs[i] 1259 return ctrl.kubectl.DeleteResource(context.Background(), config, obj.GroupVersionKind(), obj.GetName(), obj.GetNamespace(), metav1.DeleteOptions{PropagationPolicy: &propagationPolicy}) 1260 }) 1261 if err != nil { 1262 return err 1263 } 1264 1265 objsMap, err = ctrl.getPermittedAppLiveObjects(destCluster, app, proj, projectClusters) 1266 if err != nil { 1267 return err 1268 } 1269 1270 for k, obj := range objsMap { 1271 if !ctrl.shouldBeDeleted(app, obj) { 1272 delete(objsMap, k) 1273 } 1274 } 1275 if len(objsMap) > 0 { 1276 logCtx.Infof("%d objects remaining for deletion", len(objsMap)) 1277 return nil 1278 } 1279 logCtx.Infof("Successfully deleted %d resources", len(objs)) 1280 app.UnSetCascadedDeletion() 1281 return ctrl.updateFinalizers(app) 1282 } 1283 1284 if app.HasPostDeleteFinalizer() { 1285 objsMap, err := ctrl.getPermittedAppLiveObjects(destCluster, app, proj, projectClusters) 1286 if err != nil { 1287 return err 1288 } 1289 1290 done, err := ctrl.executePostDeleteHooks(app, proj, objsMap, config, logCtx) 1291 if err != nil { 1292 return err 1293 } 1294 if !done { 1295 return nil 1296 } 1297 app.UnSetPostDeleteFinalizer() 1298 return ctrl.updateFinalizers(app) 1299 } 1300 1301 if app.HasPostDeleteFinalizer("cleanup") { 1302 objsMap, err := ctrl.getPermittedAppLiveObjects(destCluster, app, proj, projectClusters) 1303 if err != nil { 1304 return err 1305 } 1306 1307 done, err := ctrl.cleanupPostDeleteHooks(objsMap, config, logCtx) 1308 if err != nil { 1309 return err 1310 } 1311 if !done { 1312 return nil 1313 } 1314 app.UnSetPostDeleteFinalizer("cleanup") 1315 return ctrl.updateFinalizers(app) 1316 } 1317 1318 if !app.CascadedDeletion() && !app.HasPostDeleteFinalizer() { 1319 if err := ctrl.cache.SetAppManagedResources(app.Name, nil); err != nil { 1320 return err 1321 } 1322 1323 if err := ctrl.cache.SetAppResourcesTree(app.Name, nil); err != nil { 1324 return err 1325 } 1326 ctrl.projectRefreshQueue.Add(fmt.Sprintf("%s/%s", ctrl.namespace, app.Spec.GetProject())) 1327 } 1328 1329 return nil 1330 } 1331 1332 func (ctrl *ApplicationController) updateFinalizers(app *appv1.Application) error { 1333 _, err := ctrl.getAppProj(app) 1334 if err != nil { 1335 return fmt.Errorf("error getting project: %w", err) 1336 } 1337 1338 var patch []byte 1339 patch, _ = json.Marshal(map[string]any{ 1340 "metadata": map[string]any{ 1341 "finalizers": app.Finalizers, 1342 }, 1343 }) 1344 1345 _, err = ctrl.applicationClientset.ArgoprojV1alpha1().Applications(app.Namespace).Patch(context.Background(), app.Name, types.MergePatchType, patch, metav1.PatchOptions{}) 1346 return err 1347 } 1348 1349 func (ctrl *ApplicationController) setAppCondition(app *appv1.Application, condition appv1.ApplicationCondition) { 1350 logCtx := log.WithFields(applog.GetAppLogFields(app)) 1351 // do nothing if app already has same condition 1352 for _, c := range app.Status.Conditions { 1353 if c.Message == condition.Message && c.Type == condition.Type { 1354 return 1355 } 1356 } 1357 1358 app.Status.SetConditions([]appv1.ApplicationCondition{condition}, map[appv1.ApplicationConditionType]bool{condition.Type: true}) 1359 1360 var patch []byte 1361 patch, err := json.Marshal(map[string]any{ 1362 "status": map[string]any{ 1363 "conditions": app.Status.Conditions, 1364 }, 1365 }) 1366 if err == nil { 1367 _, err = ctrl.applicationClientset.ArgoprojV1alpha1().Applications(app.Namespace).Patch(context.Background(), app.Name, types.MergePatchType, patch, metav1.PatchOptions{}) 1368 } 1369 if err != nil { 1370 logCtx.Errorf("Unable to set application condition: %v", err) 1371 } 1372 } 1373 1374 func (ctrl *ApplicationController) processRequestedAppOperation(app *appv1.Application) { 1375 logCtx := log.WithFields(applog.GetAppLogFields(app)) 1376 var state *appv1.OperationState 1377 // Recover from any unexpected panics and automatically set the status to be failed 1378 defer func() { 1379 if r := recover(); r != nil { 1380 logCtx.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) 1381 state.Phase = synccommon.OperationError 1382 if rerr, ok := r.(error); ok { 1383 state.Message = rerr.Error() 1384 } else { 1385 state.Message = fmt.Sprintf("%v", r) 1386 } 1387 ctrl.setOperationState(app, state) 1388 } 1389 }() 1390 ts := stats.NewTimingStats() 1391 defer func() { 1392 for k, v := range ts.Timings() { 1393 logCtx = logCtx.WithField(k, v.Milliseconds()) 1394 } 1395 logCtx = logCtx.WithField("time_ms", time.Since(ts.StartTime).Milliseconds()) 1396 logCtx.Debug("Finished processing requested app operation") 1397 }() 1398 terminatingCause := "" 1399 if isOperationInProgress(app) { 1400 state = app.Status.OperationState.DeepCopy() 1401 switch { 1402 case state.Phase == synccommon.OperationTerminating: 1403 logCtx.Infof("Resuming in-progress operation. phase: %s, message: %s", state.Phase, state.Message) 1404 case ctrl.syncTimeout != time.Duration(0) && time.Now().After(state.StartedAt.Add(ctrl.syncTimeout)): 1405 state.Phase = synccommon.OperationTerminating 1406 state.Message = "operation is terminating due to timeout" 1407 terminatingCause = "controller sync timeout" 1408 ctrl.setOperationState(app, state) 1409 logCtx.Infof("Terminating in-progress operation due to timeout. Started at: %v, timeout: %v", state.StartedAt, ctrl.syncTimeout) 1410 case state.Phase == synccommon.OperationRunning && state.FinishedAt != nil: 1411 // Failed operation with retry strategy might be in-progress and has completion time 1412 retryAt, err := app.Status.OperationState.Operation.Retry.NextRetryAt(state.FinishedAt.Time, state.RetryCount) 1413 if err != nil { 1414 state.Phase = synccommon.OperationError 1415 state.Message = err.Error() 1416 ctrl.setOperationState(app, state) 1417 return 1418 } 1419 retryAfter := time.Until(retryAt) 1420 1421 if retryAfter > 0 { 1422 logCtx.Infof("Skipping retrying in-progress operation. Attempting again at: %s", retryAt.Format(time.RFC3339)) 1423 ctrl.requestAppRefresh(app.QualifiedName(), CompareWithLatest.Pointer(), &retryAfter) 1424 return 1425 } 1426 1427 // Remove the desired revisions if the sync failed and we are retrying. The latest revision from the source will be used. 1428 extraMsg := "" 1429 if state.Operation.Retry.Refresh { 1430 extraMsg += " with latest revisions" 1431 state.Operation.Sync.Revision = "" 1432 state.Operation.Sync.Revisions = nil 1433 } 1434 1435 // Get rid of sync results and null out previous operation completion time 1436 // This will start the retry attempt 1437 state.Message = fmt.Sprintf("Retrying operation%s. Attempt #%d", extraMsg, state.RetryCount) 1438 state.FinishedAt = nil 1439 state.SyncResult = nil 1440 ctrl.setOperationState(app, state) 1441 logCtx.Infof("Retrying operation%s. Attempt #%d", extraMsg, state.RetryCount) 1442 default: 1443 logCtx.Infof("Resuming in-progress operation. phase: %s, message: %s", state.Phase, state.Message) 1444 } 1445 } else { 1446 state = NewOperationState(*app.Operation) 1447 ctrl.setOperationState(app, state) 1448 if ctrl.syncTimeout != time.Duration(0) { 1449 // Schedule a check during which the timeout would be checked. 1450 ctrl.appOperationQueue.AddAfter(ctrl.toAppKey(app.QualifiedName()), ctrl.syncTimeout) 1451 } 1452 logCtx.Infof("Initialized new operation: %v", *app.Operation) 1453 } 1454 ts.AddCheckpoint("initial_operation_stage_ms") 1455 1456 terminating := state.Phase == synccommon.OperationTerminating 1457 project, err := ctrl.getAppProj(app) 1458 if err == nil { 1459 // Start or resume the sync 1460 ctrl.appStateManager.SyncAppState(app, project, state) 1461 } else { 1462 state.Phase = synccommon.OperationError 1463 state.Message = fmt.Sprintf("Failed to load application project: %v", err) 1464 } 1465 ts.AddCheckpoint("sync_app_state_ms") 1466 1467 switch state.Phase { 1468 case synccommon.OperationRunning: 1469 // It's possible for an app to be terminated while we were operating on it. We do not want 1470 // to clobber the Terminated state with Running. Get the latest app state to check for this. 1471 freshApp, err := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(app.Namespace).Get(context.Background(), app.Name, metav1.GetOptions{}) 1472 if err == nil { 1473 if freshApp.Status.OperationState != nil && freshApp.Status.OperationState.Phase == synccommon.OperationTerminating { 1474 state.Phase = synccommon.OperationTerminating 1475 state.Message = "operation is terminating" 1476 // after this, we will get requeued to the workqueue, but next time the 1477 // SyncAppState will operate in a Terminating phase, allowing the worker to perform 1478 // cleanup (e.g. delete jobs, workflows, etc...) 1479 } 1480 } 1481 case synccommon.OperationFailed, synccommon.OperationError: 1482 if !terminating && (state.RetryCount < state.Operation.Retry.Limit || state.Operation.Retry.Limit < 0) { 1483 now := metav1.Now() 1484 if retryAt, err := state.Operation.Retry.NextRetryAt(now.Time, state.RetryCount); err != nil { 1485 state.Phase = synccommon.OperationError 1486 state.Message = fmt.Sprintf("%s (failed to retry: %v)", state.Message, err) 1487 } else { 1488 // Set FinishedAt explicitly on a Running phase. This is a unique condition that will allow this 1489 // function to perform a retry the next time the operation is processed. 1490 state.Phase = synccommon.OperationRunning 1491 state.FinishedAt = &now 1492 state.RetryCount++ 1493 state.Message = fmt.Sprintf("%s. Retrying attempt #%d at %s.", state.Message, state.RetryCount, retryAt.Format(time.Kitchen)) 1494 } 1495 } else { 1496 if terminating && terminatingCause != "" { 1497 state.Message = fmt.Sprintf("%s, triggered by %s", state.Message, terminatingCause) 1498 } 1499 if state.RetryCount > 0 { 1500 state.Message = fmt.Sprintf("%s (retried %d times).", state.Message, state.RetryCount) 1501 } 1502 } 1503 } 1504 1505 ctrl.setOperationState(app, state) 1506 ts.AddCheckpoint("final_set_operation_state") 1507 if state.Phase.Completed() && (app.Operation.Sync != nil && !app.Operation.Sync.DryRun) { 1508 // if we just completed an operation, force a refresh so that UI will report up-to-date 1509 // sync/health information 1510 if _, err := cache.MetaNamespaceKeyFunc(app); err == nil { 1511 // force app refresh with using CompareWithLatest comparison type and trigger app reconciliation loop 1512 ctrl.requestAppRefresh(app.QualifiedName(), CompareWithLatestForceResolve.Pointer(), nil) 1513 } else { 1514 logCtx.Warnf("Fails to requeue application: %v", err) 1515 } 1516 } 1517 ts.AddCheckpoint("request_app_refresh_ms") 1518 } 1519 1520 func (ctrl *ApplicationController) setOperationState(app *appv1.Application, state *appv1.OperationState) { 1521 logCtx := log.WithFields(applog.GetAppLogFields(app)) 1522 if state.Phase == "" { 1523 // expose any bugs where we neglect to set phase 1524 panic("no phase was set") 1525 } 1526 if state.Phase.Completed() { 1527 now := metav1.Now() 1528 state.FinishedAt = &now 1529 } 1530 patch := map[string]any{ 1531 "status": map[string]any{ 1532 "operationState": state, 1533 }, 1534 } 1535 if state.Phase.Completed() { 1536 // If operation is completed, clear the operation field to indicate no operation is 1537 // in progress. 1538 patch["operation"] = nil 1539 } 1540 if reflect.DeepEqual(app.Status.OperationState, state) { 1541 logCtx.Infof("No operation updates necessary to '%s'. Skipping patch", app.QualifiedName()) 1542 return 1543 } 1544 patchJSON, err := json.Marshal(patch) 1545 if err != nil { 1546 logCtx.Errorf("error marshaling json: %v", err) 1547 return 1548 } 1549 if app.Status.OperationState != nil && app.Status.OperationState.FinishedAt != nil && state.FinishedAt == nil { 1550 patchJSON, err = jsonpatch.MergeMergePatches(patchJSON, []byte(`{"status": {"operationState": {"finishedAt": null}}}`)) 1551 if err != nil { 1552 logCtx.Errorf("error merging operation state patch: %v", err) 1553 return 1554 } 1555 } 1556 1557 kube.RetryUntilSucceed(context.Background(), updateOperationStateTimeout, "Update application operation state", logutils.NewLogrusLogger(logutils.NewWithCurrentConfig()), func() error { 1558 _, err := ctrl.PatchAppWithWriteBack(context.Background(), app.Name, app.Namespace, types.MergePatchType, patchJSON, metav1.PatchOptions{}) 1559 if err != nil { 1560 // Stop retrying updating deleted application 1561 if apierrors.IsNotFound(err) { 1562 return nil 1563 } 1564 // kube.RetryUntilSucceed logs failed attempts at "debug" level, but we want to know if this fails. Log a 1565 // warning. 1566 logCtx.Warnf("error patching application with operation state: %v", err) 1567 return fmt.Errorf("error patching application with operation state: %w", err) 1568 } 1569 return nil 1570 }) 1571 1572 logCtx.Infof("updated '%s' operation (phase: %s)", app.QualifiedName(), state.Phase) 1573 if state.Phase.Completed() { 1574 eventInfo := argo.EventInfo{Reason: argo.EventReasonOperationCompleted} 1575 var messages []string 1576 if state.Operation.Sync != nil && len(state.Operation.Sync.Resources) > 0 { 1577 messages = []string{"Partial sync operation"} 1578 } else { 1579 messages = []string{"Sync operation"} 1580 } 1581 if state.SyncResult != nil { 1582 messages = append(messages, "to", state.SyncResult.Revision) 1583 } 1584 if state.Phase.Successful() { 1585 eventInfo.Type = corev1.EventTypeNormal 1586 messages = append(messages, "succeeded") 1587 } else { 1588 eventInfo.Type = corev1.EventTypeWarning 1589 messages = append(messages, "failed:", state.Message) 1590 } 1591 ctrl.logAppEvent(context.TODO(), app, eventInfo, strings.Join(messages, " ")) 1592 1593 destCluster, err := argo.GetDestinationCluster(context.Background(), app.Spec.Destination, ctrl.db) 1594 if err != nil { 1595 logCtx.Warnf("Unable to get destination cluster, setting dest_server label to empty string in sync metric: %v", err) 1596 } 1597 destServer := "" 1598 if destCluster != nil { 1599 destServer = destCluster.Server 1600 } 1601 ctrl.metricsServer.IncSync(app, destServer, state) 1602 ctrl.metricsServer.IncAppSyncDuration(app, destServer, state) 1603 } 1604 } 1605 1606 // writeBackToInformer writes a just recently updated App back into the informer cache. 1607 // This prevents the situation where the controller operates on a stale app and repeats work 1608 func (ctrl *ApplicationController) writeBackToInformer(app *appv1.Application) { 1609 logCtx := log.WithFields(applog.GetAppLogFields(app)).WithField("informer-writeBack", true) 1610 err := ctrl.appInformer.GetStore().Update(app) 1611 if err != nil { 1612 logCtx.Errorf("failed to update informer store: %v", err) 1613 return 1614 } 1615 } 1616 1617 // PatchAppWithWriteBack patches an application and writes it back to the informer cache 1618 func (ctrl *ApplicationController) PatchAppWithWriteBack(ctx context.Context, name, ns string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *appv1.Application, err error) { 1619 patchedApp, err := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(ns).Patch(ctx, name, pt, data, opts, subresources...) 1620 if err != nil { 1621 return patchedApp, err 1622 } 1623 ctrl.writeBackToInformer(patchedApp) 1624 return patchedApp, err 1625 } 1626 1627 func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext bool) { 1628 patchDuration := time.Duration(0) // time spent in doing patch/update calls 1629 setOpDuration := time.Duration(0) // time spent in doing Operation patch calls in autosync 1630 appKey, shutdown := ctrl.appRefreshQueue.Get() 1631 if shutdown { 1632 processNext = false 1633 return 1634 } 1635 processNext = true 1636 defer func() { 1637 if r := recover(); r != nil { 1638 log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) 1639 } 1640 // We want to have app operation update happen after the sync, so there's no race condition 1641 // and app updates not proceeding. See https://github.com/argoproj/argo-cd/issues/18500. 1642 ctrl.appOperationQueue.AddRateLimited(appKey) 1643 ctrl.appRefreshQueue.Done(appKey) 1644 }() 1645 obj, exists, err := ctrl.appInformer.GetIndexer().GetByKey(appKey) 1646 if err != nil { 1647 log.Errorf("Failed to get application '%s' from informer index: %+v", appKey, err) 1648 return 1649 } 1650 if !exists { 1651 // This happens after app was deleted, but the work queue still had an entry for it. 1652 return 1653 } 1654 origApp, ok := obj.(*appv1.Application) 1655 if !ok { 1656 log.Warnf("Key '%s' in index is not an application", appKey) 1657 return 1658 } 1659 origApp = origApp.DeepCopy() 1660 needRefresh, refreshType, comparisonLevel := ctrl.needRefreshAppStatus(origApp, ctrl.statusRefreshTimeout, ctrl.statusHardRefreshTimeout) 1661 1662 if !needRefresh { 1663 return 1664 } 1665 app := origApp.DeepCopy() 1666 logCtx := log.WithFields(applog.GetAppLogFields(app)).WithFields(log.Fields{ 1667 "comparison-level": comparisonLevel, 1668 "dest-server": origApp.Spec.Destination.Server, 1669 "dest-name": origApp.Spec.Destination.Name, 1670 "dest-namespace": origApp.Spec.Destination.Namespace, 1671 }) 1672 1673 startTime := time.Now() 1674 ts := stats.NewTimingStats() 1675 var destCluster *appv1.Cluster 1676 defer func() { 1677 reconcileDuration := time.Since(startTime) 1678 1679 // We may or may not get to the point in the code where destCluster is set. Populate the dest_server label on a 1680 // best-effort basis. 1681 destServer := "" 1682 if destCluster != nil { 1683 destServer = destCluster.Server 1684 } 1685 ctrl.metricsServer.IncReconcile(origApp, destServer, reconcileDuration) 1686 for k, v := range ts.Timings() { 1687 logCtx = logCtx.WithField(k, v.Milliseconds()) 1688 } 1689 logCtx.WithFields(log.Fields{ 1690 "time_ms": reconcileDuration.Milliseconds(), 1691 "patch_ms": patchDuration.Milliseconds(), 1692 "setop_ms": setOpDuration.Milliseconds(), 1693 }).Info("Reconciliation completed") 1694 }() 1695 1696 if comparisonLevel == ComparisonWithNothing { 1697 // If the destination cluster is invalid, fallback to the normal reconciliation flow 1698 if destCluster, err = argo.GetDestinationCluster(context.Background(), app.Spec.Destination, ctrl.db); err == nil { 1699 managedResources := make([]*appv1.ResourceDiff, 0) 1700 if err := ctrl.cache.GetAppManagedResources(app.InstanceName(ctrl.namespace), &managedResources); err == nil { 1701 var tree *appv1.ApplicationTree 1702 if tree, err = ctrl.getResourceTree(destCluster, app, managedResources); err == nil { 1703 app.Status.Summary = tree.GetSummary(app) 1704 if err := ctrl.cache.SetAppResourcesTree(app.InstanceName(ctrl.namespace), tree); err != nil { 1705 logCtx.Errorf("Failed to cache resources tree: %v", err) 1706 return 1707 } 1708 } 1709 1710 patchDuration = ctrl.persistAppStatus(origApp, &app.Status) 1711 return 1712 } 1713 logCtx.Warnf("Failed to get cached managed resources for tree reconciliation, fall back to full reconciliation") 1714 } 1715 } 1716 ts.AddCheckpoint("comparison_with_nothing_ms") 1717 1718 project, hasErrors := ctrl.refreshAppConditions(app) 1719 ts.AddCheckpoint("refresh_app_conditions_ms") 1720 now := metav1.Now() 1721 if hasErrors { 1722 app.Status.Sync.Status = appv1.SyncStatusCodeUnknown 1723 app.Status.Health.Status = health.HealthStatusUnknown 1724 patchDuration = ctrl.persistAppStatus(origApp, &app.Status) 1725 1726 if err := ctrl.cache.SetAppResourcesTree(app.InstanceName(ctrl.namespace), &appv1.ApplicationTree{}); err != nil { 1727 logCtx.Warnf("failed to set app resource tree: %v", err) 1728 } 1729 if err := ctrl.cache.SetAppManagedResources(app.InstanceName(ctrl.namespace), nil); err != nil { 1730 logCtx.Warnf("failed to set app managed resources tree: %v", err) 1731 } 1732 ts.AddCheckpoint("process_refresh_app_conditions_errors_ms") 1733 return 1734 } 1735 1736 destCluster, err = argo.GetDestinationCluster(context.Background(), app.Spec.Destination, ctrl.db) 1737 if err != nil { 1738 logCtx.Errorf("Failed to get destination cluster: %v", err) 1739 // exit the reconciliation. ctrl.refreshAppConditions should have caught the error 1740 return 1741 } 1742 1743 var localManifests []string 1744 if opState := app.Status.OperationState; opState != nil && opState.Operation.Sync != nil { 1745 localManifests = opState.Operation.Sync.Manifests 1746 } 1747 1748 revisions := make([]string, 0) 1749 sources := make([]appv1.ApplicationSource, 0) 1750 1751 hasMultipleSources := app.Spec.HasMultipleSources() 1752 1753 // If we have multiple sources, we use all the sources under `sources` field and ignore source under `source` field. 1754 // else we use the source under the source field. 1755 if hasMultipleSources { 1756 for _, source := range app.Spec.Sources { 1757 // We do not perform any filtering of duplicate sources. 1758 // Argo CD will apply and update the resources generated from the sources automatically 1759 // based on the order in which manifests were generated 1760 sources = append(sources, source) 1761 revisions = append(revisions, source.TargetRevision) 1762 } 1763 if comparisonLevel == CompareWithRecent { 1764 revisions = app.Status.Sync.Revisions 1765 } 1766 } else { 1767 revision := app.Spec.GetSource().TargetRevision 1768 if comparisonLevel == CompareWithRecent { 1769 revision = app.Status.Sync.Revision 1770 } 1771 revisions = append(revisions, revision) 1772 sources = append(sources, app.Spec.GetSource()) 1773 } 1774 1775 compareResult, err := ctrl.appStateManager.CompareAppState(app, project, revisions, sources, refreshType == appv1.RefreshTypeHard, comparisonLevel == CompareWithLatestForceResolve, localManifests, hasMultipleSources) 1776 1777 ts.AddCheckpoint("compare_app_state_ms") 1778 1779 if stderrors.Is(err, ErrCompareStateRepo) { 1780 logCtx.Warnf("Ignoring temporary failed attempt to compare app state against repo: %v", err) 1781 return // short circuit if git error is encountered 1782 } 1783 1784 for k, v := range compareResult.timings { 1785 logCtx = logCtx.WithField(k, v.Milliseconds()) 1786 } 1787 1788 ctrl.normalizeApplication(origApp, app) 1789 ts.AddCheckpoint("normalize_application_ms") 1790 1791 tree, err := ctrl.setAppManagedResources(destCluster, app, compareResult) 1792 ts.AddCheckpoint("set_app_managed_resources_ms") 1793 if err != nil { 1794 logCtx.Errorf("Failed to cache app resources: %v", err) 1795 } else { 1796 app.Status.Summary = tree.GetSummary(app) 1797 } 1798 1799 canSync, _ := project.Spec.SyncWindows.Matches(app).CanSync(false) 1800 if canSync { 1801 syncErrCond, opDuration := ctrl.autoSync(app, compareResult.syncStatus, compareResult.resources, compareResult.revisionsMayHaveChanges) 1802 setOpDuration = opDuration 1803 if syncErrCond != nil { 1804 app.Status.SetConditions( 1805 []appv1.ApplicationCondition{*syncErrCond}, 1806 map[appv1.ApplicationConditionType]bool{appv1.ApplicationConditionSyncError: true}, 1807 ) 1808 } else { 1809 app.Status.SetConditions( 1810 []appv1.ApplicationCondition{}, 1811 map[appv1.ApplicationConditionType]bool{appv1.ApplicationConditionSyncError: true}, 1812 ) 1813 } 1814 } else { 1815 logCtx.Info("Sync prevented by sync window") 1816 } 1817 ts.AddCheckpoint("auto_sync_ms") 1818 1819 if app.Status.ReconciledAt == nil || comparisonLevel >= CompareWithLatest { 1820 app.Status.ReconciledAt = &now 1821 } 1822 app.Status.Sync = *compareResult.syncStatus 1823 app.Status.Health.Status = compareResult.healthStatus 1824 app.Status.Resources = compareResult.resources 1825 sort.Slice(app.Status.Resources, func(i, j int) bool { 1826 return resourceStatusKey(app.Status.Resources[i]) < resourceStatusKey(app.Status.Resources[j]) 1827 }) 1828 app.Status.SourceType = compareResult.appSourceType 1829 app.Status.SourceTypes = compareResult.appSourceTypes 1830 app.Status.ControllerNamespace = ctrl.namespace 1831 ts.AddCheckpoint("app_status_update_ms") 1832 patchDuration = ctrl.persistAppStatus(origApp, &app.Status) 1833 // This is a partly a duplicate of patch_ms, but more descriptive and allows to have measurement for the next step. 1834 ts.AddCheckpoint("persist_app_status_ms") 1835 if (compareResult.hasPostDeleteHooks != app.HasPostDeleteFinalizer() || compareResult.hasPostDeleteHooks != app.HasPostDeleteFinalizer("cleanup")) && 1836 app.GetDeletionTimestamp() == nil { 1837 if compareResult.hasPostDeleteHooks { 1838 app.SetPostDeleteFinalizer() 1839 app.SetPostDeleteFinalizer("cleanup") 1840 } else { 1841 app.UnSetPostDeleteFinalizer() 1842 app.UnSetPostDeleteFinalizer("cleanup") 1843 } 1844 1845 if err := ctrl.updateFinalizers(app); err != nil { 1846 logCtx.Errorf("Failed to update finalizers: %v", err) 1847 } 1848 } 1849 ts.AddCheckpoint("process_finalizers_ms") 1850 return 1851 } 1852 1853 func (ctrl *ApplicationController) processAppHydrateQueueItem() (processNext bool) { 1854 appKey, shutdown := ctrl.appHydrateQueue.Get() 1855 if shutdown { 1856 processNext = false 1857 return 1858 } 1859 processNext = true 1860 defer func() { 1861 if r := recover(); r != nil { 1862 log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) 1863 } 1864 ctrl.appHydrateQueue.Done(appKey) 1865 }() 1866 obj, exists, err := ctrl.appInformer.GetIndexer().GetByKey(appKey) 1867 if err != nil { 1868 log.Errorf("Failed to get application '%s' from informer index: %+v", appKey, err) 1869 return 1870 } 1871 if !exists { 1872 // This happens after app was deleted, but the work queue still had an entry for it. 1873 return 1874 } 1875 origApp, ok := obj.(*appv1.Application) 1876 if !ok { 1877 log.Warnf("Key '%s' in index is not an application", appKey) 1878 return 1879 } 1880 1881 ctrl.hydrator.ProcessAppHydrateQueueItem(origApp.DeepCopy()) 1882 1883 log.WithFields(applog.GetAppLogFields(origApp)).Debug("Successfully processed app hydrate queue item") 1884 return 1885 } 1886 1887 func (ctrl *ApplicationController) processHydrationQueueItem() (processNext bool) { 1888 hydrationKey, shutdown := ctrl.hydrationQueue.Get() 1889 if shutdown { 1890 processNext = false 1891 return 1892 } 1893 processNext = true 1894 defer func() { 1895 if r := recover(); r != nil { 1896 log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) 1897 } 1898 ctrl.hydrationQueue.Done(hydrationKey) 1899 }() 1900 1901 logCtx := log.WithFields(log.Fields{ 1902 "sourceRepoURL": hydrationKey.SourceRepoURL, 1903 "sourceTargetRevision": hydrationKey.SourceTargetRevision, 1904 "destinationBranch": hydrationKey.DestinationBranch, 1905 }) 1906 1907 logCtx.Debug("Processing hydration queue item") 1908 1909 ctrl.hydrator.ProcessHydrationQueueItem(hydrationKey) 1910 1911 logCtx.Debug("Successfully processed hydration queue item") 1912 return 1913 } 1914 1915 func resourceStatusKey(res appv1.ResourceStatus) string { 1916 return strings.Join([]string{res.Group, res.Kind, res.Namespace, res.Name}, "/") 1917 } 1918 1919 func currentSourceEqualsSyncedSource(app *appv1.Application) bool { 1920 if app.Spec.HasMultipleSources() { 1921 return app.Spec.Sources.Equals(app.Status.Sync.ComparedTo.Sources) 1922 } 1923 source := app.Spec.GetSource() 1924 return source.Equals(&app.Status.Sync.ComparedTo.Source) 1925 } 1926 1927 // needRefreshAppStatus answers if application status needs to be refreshed. 1928 // Returns true if application never been compared, has changed or comparison result has expired. 1929 // Additionally, it returns whether full refresh was requested or not. 1930 // If full refresh is requested then target and live state should be reconciled, else only live state tree should be updated. 1931 func (ctrl *ApplicationController) needRefreshAppStatus(app *appv1.Application, statusRefreshTimeout, statusHardRefreshTimeout time.Duration) (bool, appv1.RefreshType, CompareWith) { 1932 logCtx := log.WithFields(applog.GetAppLogFields(app)) 1933 var reason string 1934 compareWith := CompareWithLatest 1935 refreshType := appv1.RefreshTypeNormal 1936 1937 softExpired := app.Status.ReconciledAt == nil || app.Status.ReconciledAt.Add(statusRefreshTimeout).Before(time.Now().UTC()) 1938 hardExpired := (app.Status.ReconciledAt == nil || app.Status.ReconciledAt.Add(statusHardRefreshTimeout).Before(time.Now().UTC())) && statusHardRefreshTimeout.Seconds() != 0 1939 1940 if requestedType, ok := app.IsRefreshRequested(); ok { 1941 compareWith = CompareWithLatestForceResolve 1942 // user requested app refresh. 1943 refreshType = requestedType 1944 reason = fmt.Sprintf("%s refresh requested", refreshType) 1945 } else { 1946 if !currentSourceEqualsSyncedSource(app) { 1947 reason = "spec.source differs" 1948 compareWith = CompareWithLatestForceResolve 1949 if app.Spec.HasMultipleSources() { 1950 reason = "at least one of the spec.sources differs" 1951 } 1952 } else if hardExpired || softExpired { 1953 // The commented line below mysteriously crashes if app.Status.ReconciledAt is nil 1954 // reason = fmt.Sprintf("comparison expired. reconciledAt: %v, expiry: %v", app.Status.ReconciledAt, statusRefreshTimeout) 1955 // TODO: find existing Golang bug or create a new one 1956 reconciledAtStr := "never" 1957 if app.Status.ReconciledAt != nil { 1958 reconciledAtStr = app.Status.ReconciledAt.String() 1959 } 1960 reason = fmt.Sprintf("comparison expired, requesting refresh. reconciledAt: %v, expiry: %v", reconciledAtStr, statusRefreshTimeout) 1961 if hardExpired { 1962 reason = fmt.Sprintf("comparison expired, requesting hard refresh. reconciledAt: %v, expiry: %v", reconciledAtStr, statusHardRefreshTimeout) 1963 refreshType = appv1.RefreshTypeHard 1964 } 1965 } else if !reflect.DeepEqual(app.Spec.Destination, app.Status.Sync.ComparedTo.Destination) { 1966 reason = "spec.destination differs" 1967 } else if app.HasChangedManagedNamespaceMetadata() { 1968 reason = "spec.syncPolicy.managedNamespaceMetadata differs" 1969 } else if !app.Spec.IgnoreDifferences.Equals(app.Status.Sync.ComparedTo.IgnoreDifferences) { 1970 reason = "spec.ignoreDifferences differs" 1971 } else if requested, level := ctrl.isRefreshRequested(app.QualifiedName()); requested { 1972 compareWith = level 1973 reason = "controller refresh requested" 1974 } 1975 } 1976 1977 if reason != "" { 1978 logCtx.Infof("Refreshing app status (%s), level (%d)", reason, compareWith) 1979 return true, refreshType, compareWith 1980 } 1981 return false, refreshType, compareWith 1982 } 1983 1984 func (ctrl *ApplicationController) refreshAppConditions(app *appv1.Application) (*appv1.AppProject, bool) { 1985 errorConditions := make([]appv1.ApplicationCondition, 0) 1986 proj, err := ctrl.getAppProj(app) 1987 if err != nil { 1988 errorConditions = append(errorConditions, ctrl.projectErrorToCondition(err, app)) 1989 } else { 1990 specConditions, err := argo.ValidatePermissions(context.Background(), &app.Spec, proj, ctrl.db) 1991 if err != nil { 1992 errorConditions = append(errorConditions, appv1.ApplicationCondition{ 1993 Type: appv1.ApplicationConditionUnknownError, 1994 Message: err.Error(), 1995 }) 1996 } else { 1997 errorConditions = append(errorConditions, specConditions...) 1998 } 1999 } 2000 app.Status.SetConditions(errorConditions, map[appv1.ApplicationConditionType]bool{ 2001 appv1.ApplicationConditionInvalidSpecError: true, 2002 appv1.ApplicationConditionUnknownError: true, 2003 }) 2004 return proj, len(errorConditions) > 0 2005 } 2006 2007 // normalizeApplication normalizes an application.spec and additionally persists updates if it changed 2008 func (ctrl *ApplicationController) normalizeApplication(orig, app *appv1.Application) { 2009 app.Spec = *argo.NormalizeApplicationSpec(&app.Spec) 2010 logCtx := log.WithFields(applog.GetAppLogFields(app)) 2011 2012 patch, modified, err := diff.CreateTwoWayMergePatch(orig, app, appv1.Application{}) 2013 2014 if err != nil { 2015 logCtx.Errorf("error constructing app spec patch: %v", err) 2016 } else if modified { 2017 _, err := ctrl.PatchAppWithWriteBack(context.Background(), app.Name, app.Namespace, types.MergePatchType, patch, metav1.PatchOptions{}) 2018 if err != nil { 2019 logCtx.Errorf("Error persisting normalized application spec: %v", err) 2020 } else { 2021 logCtx.Infof("Normalized app spec: %s", string(patch)) 2022 } 2023 } 2024 } 2025 2026 func createMergePatch(orig, newV any) ([]byte, bool, error) { 2027 origBytes, err := json.Marshal(orig) 2028 if err != nil { 2029 return nil, false, err 2030 } 2031 newBytes, err := json.Marshal(newV) 2032 if err != nil { 2033 return nil, false, err 2034 } 2035 patch, err := jsonpatch.CreateMergePatch(origBytes, newBytes) 2036 if err != nil { 2037 return nil, false, err 2038 } 2039 return patch, string(patch) != "{}", nil 2040 } 2041 2042 // persistAppStatus persists updates to application status. If no changes were made, it is a no-op 2043 func (ctrl *ApplicationController) persistAppStatus(orig *appv1.Application, newStatus *appv1.ApplicationStatus) (patchDuration time.Duration) { 2044 logCtx := log.WithFields(applog.GetAppLogFields(orig)) 2045 if orig.Status.Sync.Status != newStatus.Sync.Status { 2046 message := fmt.Sprintf("Updated sync status: %s -> %s", orig.Status.Sync.Status, newStatus.Sync.Status) 2047 ctrl.logAppEvent(context.TODO(), orig, argo.EventInfo{Reason: argo.EventReasonResourceUpdated, Type: corev1.EventTypeNormal}, message) 2048 } 2049 if orig.Status.Health.Status != newStatus.Health.Status { 2050 // Update the last transition time to now. This should be the ONLY place in code where this is set, because it's 2051 // the only place that is reliably aware of the previous and updated health statuses. 2052 now := metav1.Now() 2053 newStatus.Health.LastTransitionTime = &now 2054 2055 message := fmt.Sprintf("Updated health status: %s -> %s", orig.Status.Health.Status, newStatus.Health.Status) 2056 ctrl.logAppEvent(context.TODO(), orig, argo.EventInfo{Reason: argo.EventReasonResourceUpdated, Type: corev1.EventTypeNormal}, message) 2057 } else { 2058 // make sure the last transition time is the same and populated if the health is the same 2059 newStatus.Health.LastTransitionTime = orig.Status.Health.LastTransitionTime 2060 } 2061 var newAnnotations map[string]string 2062 if orig.GetAnnotations() != nil { 2063 newAnnotations = make(map[string]string) 2064 for k, v := range orig.GetAnnotations() { 2065 newAnnotations[k] = v 2066 } 2067 delete(newAnnotations, appv1.AnnotationKeyRefresh) 2068 delete(newAnnotations, appv1.AnnotationKeyHydrate) 2069 } 2070 patch, modified, err := createMergePatch( 2071 &appv1.Application{ObjectMeta: metav1.ObjectMeta{Annotations: orig.GetAnnotations()}, Status: orig.Status}, 2072 &appv1.Application{ObjectMeta: metav1.ObjectMeta{Annotations: newAnnotations}, Status: *newStatus}) 2073 if err != nil { 2074 logCtx.Errorf("Error constructing app status patch: %v", err) 2075 return 2076 } 2077 if !modified { 2078 logCtx.Infof("No status changes. Skipping patch") 2079 return 2080 } 2081 // calculate time for path call 2082 start := time.Now() 2083 defer func() { 2084 patchDuration = time.Since(start) 2085 }() 2086 _, err = ctrl.PatchAppWithWriteBack(context.Background(), orig.Name, orig.Namespace, types.MergePatchType, patch, metav1.PatchOptions{}) 2087 if err != nil { 2088 logCtx.Warnf("Error updating application: %v", err) 2089 } else { 2090 logCtx.Infof("Update successful") 2091 } 2092 return patchDuration 2093 } 2094 2095 // autoSync will initiate a sync operation for an application configured with automated sync 2096 func (ctrl *ApplicationController) autoSync(app *appv1.Application, syncStatus *appv1.SyncStatus, resources []appv1.ResourceStatus, shouldCompareRevisions bool) (*appv1.ApplicationCondition, time.Duration) { 2097 logCtx := log.WithFields(applog.GetAppLogFields(app)) 2098 ts := stats.NewTimingStats() 2099 defer func() { 2100 for k, v := range ts.Timings() { 2101 logCtx = logCtx.WithField(k, v.Milliseconds()) 2102 } 2103 logCtx = logCtx.WithField("time_ms", time.Since(ts.StartTime).Milliseconds()) 2104 logCtx.Debug("Finished auto sync") 2105 }() 2106 if app.Spec.SyncPolicy == nil || !app.Spec.SyncPolicy.IsAutomatedSyncEnabled() { 2107 return nil, 0 2108 } 2109 2110 if app.Operation != nil { 2111 logCtx.Infof("Skipping auto-sync: another operation is in progress") 2112 return nil, 0 2113 } 2114 if app.DeletionTimestamp != nil && !app.DeletionTimestamp.IsZero() { 2115 logCtx.Infof("Skipping auto-sync: deletion in progress") 2116 return nil, 0 2117 } 2118 2119 // Only perform auto-sync if we detect OutOfSync status. This is to prevent us from attempting 2120 // a sync when application is already in a Synced or Unknown state 2121 if syncStatus.Status != appv1.SyncStatusCodeOutOfSync { 2122 logCtx.Infof("Skipping auto-sync: application status is %s", syncStatus.Status) 2123 return nil, 0 2124 } 2125 2126 if !app.Spec.SyncPolicy.Automated.Prune { 2127 requirePruneOnly := true 2128 for _, r := range resources { 2129 if r.Status != appv1.SyncStatusCodeSynced && !r.RequiresPruning { 2130 requirePruneOnly = false 2131 break 2132 } 2133 } 2134 if requirePruneOnly { 2135 logCtx.Infof("Skipping auto-sync: need to prune extra resources only but automated prune is disabled") 2136 return nil, 0 2137 } 2138 } 2139 2140 source := ptr.To(app.Spec.GetSource()) 2141 desiredRevisions := []string{syncStatus.Revision} 2142 if app.Spec.HasMultipleSources() { 2143 source = nil 2144 desiredRevisions = syncStatus.Revisions 2145 } 2146 2147 op := appv1.Operation{ 2148 Sync: &appv1.SyncOperation{ 2149 Source: source, 2150 Revision: syncStatus.Revision, 2151 Prune: app.Spec.SyncPolicy.Automated.Prune, 2152 SyncOptions: app.Spec.SyncPolicy.SyncOptions, 2153 Sources: app.Spec.Sources, 2154 Revisions: syncStatus.Revisions, 2155 }, 2156 InitiatedBy: appv1.OperationInitiator{Automated: true}, 2157 Retry: appv1.RetryStrategy{Limit: 5}, 2158 } 2159 if app.Spec.SyncPolicy.Retry != nil { 2160 op.Retry = *app.Spec.SyncPolicy.Retry 2161 } 2162 2163 // It is possible for manifests to remain OutOfSync even after a sync/kubectl apply (e.g. 2164 // auto-sync with pruning disabled). We need to ensure that we do not keep Syncing an 2165 // application in an infinite loop. To detect this, we only attempt the Sync if the revision 2166 // and parameter overrides are different from our most recent sync operation. 2167 alreadyAttempted, lastAttemptedRevisions, lastAttemptedPhase := alreadyAttemptedSync(app, desiredRevisions, shouldCompareRevisions) 2168 ts.AddCheckpoint("already_attempted_sync_ms") 2169 if alreadyAttempted { 2170 if !lastAttemptedPhase.Successful() { 2171 logCtx.Warnf("Skipping auto-sync: failed previous sync attempt to %s and will not retry for %s", lastAttemptedRevisions, desiredRevisions) 2172 message := fmt.Sprintf("Failed last sync attempt to %s: %s", lastAttemptedRevisions, app.Status.OperationState.Message) 2173 return &appv1.ApplicationCondition{Type: appv1.ApplicationConditionSyncError, Message: message}, 0 2174 } 2175 if !app.Spec.SyncPolicy.Automated.SelfHeal { 2176 logCtx.Infof("Skipping auto-sync: most recent sync already to %s", desiredRevisions) 2177 return nil, 0 2178 } 2179 // Self heal will trigger a new sync operation when the desired state changes and cause the application to 2180 // be OutOfSync when it was previously synced Successfully. This means SelfHeal should only ever be attempted 2181 // when the revisions have not changed, and where the previous sync to these revision was successful 2182 2183 // Only carry SelfHealAttemptsCount to be increased when the selfHealBackoffCooldown has not elapsed yet 2184 if !ctrl.selfHealBackoffCooldownElapsed(app) { 2185 if app.Status.OperationState != nil && app.Status.OperationState.Operation.Sync != nil { 2186 op.Sync.SelfHealAttemptsCount = app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount 2187 } 2188 } 2189 2190 if remainingTime := ctrl.selfHealRemainingBackoff(app, int(op.Sync.SelfHealAttemptsCount)); remainingTime > 0 { 2191 logCtx.Infof("Skipping auto-sync: already attempted sync to %s with timeout %v (retrying in %v)", lastAttemptedRevisions, ctrl.selfHealTimeout, remainingTime) 2192 ctrl.requestAppRefresh(app.QualifiedName(), CompareWithLatest.Pointer(), &remainingTime) 2193 return nil, 0 2194 } 2195 2196 op.Sync.SelfHealAttemptsCount++ 2197 for _, resource := range resources { 2198 if resource.Status != appv1.SyncStatusCodeSynced { 2199 op.Sync.Resources = append(op.Sync.Resources, appv1.SyncOperationResource{ 2200 Kind: resource.Kind, 2201 Group: resource.Group, 2202 Name: resource.Name, 2203 }) 2204 } 2205 } 2206 } 2207 ts.AddCheckpoint("already_attempted_check_ms") 2208 2209 if app.Spec.SyncPolicy.Automated.Prune && !app.Spec.SyncPolicy.Automated.AllowEmpty { 2210 bAllNeedPrune := true 2211 for _, r := range resources { 2212 if !r.RequiresPruning { 2213 bAllNeedPrune = false 2214 } 2215 } 2216 if bAllNeedPrune { 2217 message := fmt.Sprintf("Skipping sync attempt to %s: auto-sync will wipe out all resources", desiredRevisions) 2218 logCtx.Warn(message) 2219 return &appv1.ApplicationCondition{Type: appv1.ApplicationConditionSyncError, Message: message}, 0 2220 } 2221 } 2222 2223 appIf := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(app.Namespace) 2224 ts.AddCheckpoint("get_applications_ms") 2225 start := time.Now() 2226 updatedApp, err := argo.SetAppOperation(appIf, app.Name, &op) 2227 ts.AddCheckpoint("set_app_operation_ms") 2228 setOpTime := time.Since(start) 2229 if err != nil { 2230 if stderrors.Is(err, argo.ErrAnotherOperationInProgress) { 2231 // skipping auto-sync because another operation is in progress and was not noticed due to stale data in informer 2232 // it is safe to skip auto-sync because it is already running 2233 logCtx.Warnf("Failed to initiate auto-sync to %s: %v", desiredRevisions, err) 2234 return nil, 0 2235 } 2236 2237 logCtx.Errorf("Failed to initiate auto-sync to %s: %v", desiredRevisions, err) 2238 return &appv1.ApplicationCondition{Type: appv1.ApplicationConditionSyncError, Message: err.Error()}, setOpTime 2239 } 2240 ctrl.writeBackToInformer(updatedApp) 2241 ts.AddCheckpoint("write_back_to_informer_ms") 2242 2243 message := fmt.Sprintf("Initiated automated sync to %s", desiredRevisions) 2244 ctrl.logAppEvent(context.TODO(), app, argo.EventInfo{Reason: argo.EventReasonOperationStarted, Type: corev1.EventTypeNormal}, message) 2245 logCtx.Info(message) 2246 return nil, setOpTime 2247 } 2248 2249 // alreadyAttemptedSync returns whether the most recently synced revision(s) exactly match the given desiredRevisions 2250 // and for the same application source. If the revision(s) have changed or the Application source configuration has been updated, 2251 // it will return false, indicating that a new sync should be attempted. 2252 // When newRevisionHasChanges is false, due to commits not having direct changes on the application, it will not compare the revision(s), but only the sources. 2253 // It also returns the last synced revisions if any, and the result of that last sync operation. 2254 func alreadyAttemptedSync(app *appv1.Application, desiredRevisions []string, newRevisionHasChanges bool) (bool, []string, synccommon.OperationPhase) { 2255 if app.Status.OperationState == nil { 2256 // The operation state may be removed when new operations are triggered 2257 return false, []string{}, "" 2258 } 2259 if app.Status.OperationState.SyncResult == nil { 2260 // If the sync has completed without result, it is very likely that an error happened 2261 // We don't want to resync with auto-sync indefinitely. We should have retried the configured amount of time already 2262 // In this case, a manual action to restore the app may be required 2263 log.WithFields(applog.GetAppLogFields(app)).Warn("Already attempted sync: sync does not have any results") 2264 return app.Status.OperationState.Phase.Completed(), []string{}, app.Status.OperationState.Phase 2265 } 2266 2267 if newRevisionHasChanges { 2268 log.WithFields(applog.GetAppLogFields(app)).Infof("Already attempted sync: comparing synced revisions to %s", desiredRevisions) 2269 if app.Spec.HasMultipleSources() { 2270 if !reflect.DeepEqual(app.Status.OperationState.SyncResult.Revisions, desiredRevisions) { 2271 return false, app.Status.OperationState.SyncResult.Revisions, app.Status.OperationState.Phase 2272 } 2273 } else { 2274 if len(desiredRevisions) != 1 || app.Status.OperationState.SyncResult.Revision != desiredRevisions[0] { 2275 return false, []string{app.Status.OperationState.SyncResult.Revision}, app.Status.OperationState.Phase 2276 } 2277 } 2278 } else { 2279 log.WithFields(applog.GetAppLogFields(app)).Debugf("Already attempted sync: revisions %s have no changes", desiredRevisions) 2280 } 2281 2282 log.WithFields(applog.GetAppLogFields(app)).Debug("Already attempted sync: comparing sources") 2283 if app.Spec.HasMultipleSources() { 2284 return reflect.DeepEqual(app.Spec.Sources, app.Status.OperationState.SyncResult.Sources), app.Status.OperationState.SyncResult.Revisions, app.Status.OperationState.Phase 2285 } 2286 return reflect.DeepEqual(app.Spec.GetSource(), app.Status.OperationState.SyncResult.Source), []string{app.Status.OperationState.SyncResult.Revision}, app.Status.OperationState.Phase 2287 } 2288 2289 func (ctrl *ApplicationController) selfHealRemainingBackoff(app *appv1.Application, selfHealAttemptsCount int) time.Duration { 2290 if app.Status.OperationState == nil { 2291 return time.Duration(0) 2292 } 2293 2294 var timeSinceOperation *time.Duration 2295 if app.Status.OperationState.FinishedAt != nil { 2296 timeSinceOperation = ptr.To(time.Since(app.Status.OperationState.FinishedAt.Time)) 2297 } 2298 2299 var retryAfter time.Duration 2300 if ctrl.selfHealBackoff == nil { 2301 if timeSinceOperation == nil { 2302 retryAfter = ctrl.selfHealTimeout 2303 } else { 2304 retryAfter = ctrl.selfHealTimeout - *timeSinceOperation 2305 } 2306 } else { 2307 backOff := *ctrl.selfHealBackoff 2308 backOff.Steps = selfHealAttemptsCount 2309 var delay time.Duration 2310 steps := backOff.Steps 2311 for i := 0; i < steps; i++ { 2312 delay = backOff.Step() 2313 } 2314 if timeSinceOperation == nil { 2315 retryAfter = delay 2316 } else { 2317 retryAfter = delay - *timeSinceOperation 2318 } 2319 } 2320 return retryAfter 2321 } 2322 2323 // selfHealBackoffCooldownElapsed returns true when the last successful sync has occurred since longer 2324 // than then self heal cooldown. This means that the application has been in sync for long enough to 2325 // reset the self healing backoff to its initial state 2326 func (ctrl *ApplicationController) selfHealBackoffCooldownElapsed(app *appv1.Application) bool { 2327 if app.Status.OperationState == nil || app.Status.OperationState.FinishedAt == nil { 2328 // Something is in progress, or about to be. In that case, selfHeal attempt should be zero anyway 2329 return true 2330 } 2331 2332 timeSinceLastOperation := time.Since(app.Status.OperationState.FinishedAt.Time) 2333 return timeSinceLastOperation >= ctrl.selfHealBackoffCooldown && app.Status.OperationState.Phase.Successful() 2334 } 2335 2336 // isAppNamespaceAllowed returns whether the application is allowed in the 2337 // namespace it's residing in. 2338 func (ctrl *ApplicationController) isAppNamespaceAllowed(app *appv1.Application) bool { 2339 return app.Namespace == ctrl.namespace || glob.MatchStringInList(ctrl.applicationNamespaces, app.Namespace, glob.REGEXP) 2340 } 2341 2342 func (ctrl *ApplicationController) canProcessApp(obj any) bool { 2343 app, ok := obj.(*appv1.Application) 2344 if !ok { 2345 return false 2346 } 2347 2348 // Only process given app if it exists in a watched namespace, or in the 2349 // control plane's namespace. 2350 if !ctrl.isAppNamespaceAllowed(app) { 2351 return false 2352 } 2353 2354 if annotations := app.GetAnnotations(); annotations != nil { 2355 if skipVal, ok := annotations[common.AnnotationKeyAppSkipReconcile]; ok { 2356 logCtx := log.WithFields(applog.GetAppLogFields(app)) 2357 if skipReconcile, err := strconv.ParseBool(skipVal); err == nil { 2358 if skipReconcile { 2359 logCtx.Debugf("Skipping Application reconcile based on annotation %s", common.AnnotationKeyAppSkipReconcile) 2360 return false 2361 } 2362 } else { 2363 logCtx.Debugf("Unable to determine if Application should skip reconcile based on annotation %s: %v", common.AnnotationKeyAppSkipReconcile, err) 2364 } 2365 } 2366 } 2367 2368 destCluster, err := argo.GetDestinationCluster(context.Background(), app.Spec.Destination, ctrl.db) 2369 if err != nil { 2370 return ctrl.clusterSharding.IsManagedCluster(nil) 2371 } 2372 return ctrl.clusterSharding.IsManagedCluster(destCluster) 2373 } 2374 2375 func (ctrl *ApplicationController) newApplicationInformerAndLister() (cache.SharedIndexInformer, applisters.ApplicationLister) { 2376 watchNamespace := ctrl.namespace 2377 // If we have at least one additional namespace configured, we need to 2378 // watch on them all. 2379 if len(ctrl.applicationNamespaces) > 0 { 2380 watchNamespace = "" 2381 } 2382 refreshTimeout := ctrl.statusRefreshTimeout 2383 if ctrl.statusHardRefreshTimeout.Seconds() != 0 && (ctrl.statusHardRefreshTimeout < ctrl.statusRefreshTimeout) { 2384 refreshTimeout = ctrl.statusHardRefreshTimeout 2385 } 2386 informer := cache.NewSharedIndexInformer( 2387 &cache.ListWatch{ 2388 ListFunc: func(options metav1.ListOptions) (apiruntime.Object, error) { 2389 // We are only interested in apps that exist in namespaces the 2390 // user wants to be enabled. 2391 appList, err := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(watchNamespace).List(context.TODO(), options) 2392 if err != nil { 2393 return nil, err 2394 } 2395 newItems := []appv1.Application{} 2396 for _, app := range appList.Items { 2397 if ctrl.isAppNamespaceAllowed(&app) { 2398 newItems = append(newItems, app) 2399 } 2400 } 2401 appList.Items = newItems 2402 return appList, nil 2403 }, 2404 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 2405 return ctrl.applicationClientset.ArgoprojV1alpha1().Applications(watchNamespace).Watch(context.TODO(), options) 2406 }, 2407 }, 2408 &appv1.Application{}, 2409 refreshTimeout, 2410 cache.Indexers{ 2411 cache.NamespaceIndex: func(obj any) ([]string, error) { 2412 app, ok := obj.(*appv1.Application) 2413 if ok { 2414 // We only generally work with applications that are in one 2415 // the allowed namespaces. 2416 if ctrl.isAppNamespaceAllowed(app) { 2417 // If the application is not allowed to use the project, 2418 // log an error. 2419 if _, err := ctrl.getAppProj(app); err != nil { 2420 ctrl.setAppCondition(app, ctrl.projectErrorToCondition(err, app)) 2421 } else if _, err = argo.GetDestinationCluster(context.Background(), app.Spec.Destination, ctrl.db); err != nil { 2422 ctrl.setAppCondition(app, appv1.ApplicationCondition{Type: appv1.ApplicationConditionInvalidSpecError, Message: err.Error()}) 2423 } 2424 } 2425 } 2426 2427 return cache.MetaNamespaceIndexFunc(obj) 2428 }, 2429 orphanedIndex: func(obj any) (i []string, e error) { 2430 app, ok := obj.(*appv1.Application) 2431 if !ok { 2432 return nil, nil 2433 } 2434 2435 if !ctrl.isAppNamespaceAllowed(app) { 2436 return nil, nil 2437 } 2438 2439 proj, err := ctrl.getAppProj(app) 2440 if err != nil { 2441 return nil, nil 2442 } 2443 if proj.Spec.OrphanedResources != nil { 2444 return []string{app.Spec.Destination.Namespace}, nil 2445 } 2446 return nil, nil 2447 }, 2448 }, 2449 ) 2450 lister := applisters.NewApplicationLister(informer.GetIndexer()) 2451 _, err := informer.AddEventHandler( 2452 cache.ResourceEventHandlerFuncs{ 2453 AddFunc: func(obj any) { 2454 if !ctrl.canProcessApp(obj) { 2455 return 2456 } 2457 key, err := cache.MetaNamespaceKeyFunc(obj) 2458 if err == nil { 2459 ctrl.appRefreshQueue.AddRateLimited(key) 2460 } 2461 newApp, newOK := obj.(*appv1.Application) 2462 if err == nil && newOK { 2463 ctrl.clusterSharding.AddApp(newApp) 2464 } 2465 }, 2466 UpdateFunc: func(old, new any) { 2467 if !ctrl.canProcessApp(new) { 2468 return 2469 } 2470 2471 key, err := cache.MetaNamespaceKeyFunc(new) 2472 if err != nil { 2473 return 2474 } 2475 2476 var compareWith *CompareWith 2477 var delay *time.Duration 2478 2479 oldApp, oldOK := old.(*appv1.Application) 2480 newApp, newOK := new.(*appv1.Application) 2481 if oldOK && newOK { 2482 if automatedSyncEnabled(oldApp, newApp) { 2483 log.WithFields(applog.GetAppLogFields(newApp)).Info("Enabled automated sync") 2484 compareWith = CompareWithLatest.Pointer() 2485 } 2486 if ctrl.statusRefreshJitter != 0 && oldApp.ResourceVersion == newApp.ResourceVersion { 2487 // Handler is refreshing the apps, add a random jitter to spread the load and avoid spikes 2488 jitter := time.Duration(float64(ctrl.statusRefreshJitter) * rand.Float64()) 2489 delay = &jitter 2490 } 2491 } 2492 2493 ctrl.requestAppRefresh(newApp.QualifiedName(), compareWith, delay) 2494 if !newOK || (delay != nil && *delay != time.Duration(0)) { 2495 ctrl.appOperationQueue.AddRateLimited(key) 2496 } 2497 if ctrl.hydrator != nil { 2498 ctrl.appHydrateQueue.AddRateLimited(newApp.QualifiedName()) 2499 } 2500 ctrl.clusterSharding.UpdateApp(newApp) 2501 }, 2502 DeleteFunc: func(obj any) { 2503 if !ctrl.canProcessApp(obj) { 2504 return 2505 } 2506 // IndexerInformer uses a delta queue, therefore for deletes we have to use this 2507 // key function. 2508 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 2509 if err == nil { 2510 // for deletes, we immediately add to the refresh queue 2511 ctrl.appRefreshQueue.Add(key) 2512 } 2513 delApp, delOK := obj.(*appv1.Application) 2514 if err == nil && delOK { 2515 ctrl.clusterSharding.DeleteApp(delApp) 2516 } 2517 }, 2518 }, 2519 ) 2520 if err != nil { 2521 return nil, nil 2522 } 2523 return informer, lister 2524 } 2525 2526 func (ctrl *ApplicationController) projectErrorToCondition(err error, app *appv1.Application) appv1.ApplicationCondition { 2527 var condition appv1.ApplicationCondition 2528 if apierrors.IsNotFound(err) { 2529 condition = appv1.ApplicationCondition{ 2530 Type: appv1.ApplicationConditionInvalidSpecError, 2531 Message: fmt.Sprintf("Application referencing project %s which does not exist", app.Spec.Project), 2532 } 2533 } else { 2534 condition = appv1.ApplicationCondition{Type: appv1.ApplicationConditionUnknownError, Message: err.Error()} 2535 } 2536 return condition 2537 } 2538 2539 func (ctrl *ApplicationController) RegisterClusterSecretUpdater(ctx context.Context) { 2540 updater := NewClusterInfoUpdater(ctrl.stateCache, ctrl.db, ctrl.appLister.Applications(""), ctrl.cache, ctrl.clusterSharding.IsManagedCluster, ctrl.getAppProj, ctrl.namespace) 2541 go updater.Run(ctx) 2542 } 2543 2544 func isOperationInProgress(app *appv1.Application) bool { 2545 return app.Status.OperationState != nil && !app.Status.OperationState.Phase.Completed() 2546 } 2547 2548 // automatedSyncEnabled tests if an app went from auto-sync disabled to enabled. 2549 // if it was toggled to be enabled, the informer handler will force a refresh 2550 func automatedSyncEnabled(oldApp *appv1.Application, newApp *appv1.Application) bool { 2551 oldEnabled := false 2552 oldSelfHealEnabled := false 2553 if oldApp.Spec.SyncPolicy != nil && oldApp.Spec.SyncPolicy.IsAutomatedSyncEnabled() { 2554 oldEnabled = true 2555 oldSelfHealEnabled = oldApp.Spec.SyncPolicy.Automated.SelfHeal 2556 } 2557 2558 newEnabled := false 2559 newSelfHealEnabled := false 2560 if newApp.Spec.SyncPolicy != nil && newApp.Spec.SyncPolicy.IsAutomatedSyncEnabled() { 2561 newEnabled = true 2562 newSelfHealEnabled = newApp.Spec.SyncPolicy.Automated.SelfHeal 2563 } 2564 if !oldEnabled && newEnabled { 2565 return true 2566 } 2567 if !oldSelfHealEnabled && newSelfHealEnabled { 2568 return true 2569 } 2570 // nothing changed 2571 return false 2572 } 2573 2574 // toAppKey returns the application key from a given appName, that is, it will 2575 // replace underscores with forward-slashes to become a <namespace>/<name> 2576 // format. If the appName is an unqualified name (such as, "app"), it will use 2577 // the controller's namespace in the key. 2578 func (ctrl *ApplicationController) toAppKey(appName string) string { 2579 if !strings.Contains(appName, "_") && !strings.Contains(appName, "/") { 2580 return ctrl.namespace + "/" + appName 2581 } else if strings.Contains(appName, "/") { 2582 return appName 2583 } 2584 return strings.ReplaceAll(appName, "_", "/") 2585 } 2586 2587 func (ctrl *ApplicationController) toAppQualifiedName(appName, appNamespace string) string { 2588 return fmt.Sprintf("%s/%s", appNamespace, appName) 2589 } 2590 2591 func (ctrl *ApplicationController) getAppList(options metav1.ListOptions) (*appv1.ApplicationList, error) { 2592 watchNamespace := ctrl.namespace 2593 // If we have at least one additional namespace configured, we need to 2594 // watch on them all. 2595 if len(ctrl.applicationNamespaces) > 0 { 2596 watchNamespace = "" 2597 } 2598 2599 appList, err := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(watchNamespace).List(context.TODO(), options) 2600 if err != nil { 2601 return nil, err 2602 } 2603 newItems := []appv1.Application{} 2604 for _, app := range appList.Items { 2605 if ctrl.isAppNamespaceAllowed(&app) { 2606 newItems = append(newItems, app) 2607 } 2608 } 2609 appList.Items = newItems 2610 return appList, nil 2611 } 2612 2613 func (ctrl *ApplicationController) logAppEvent(ctx context.Context, a *appv1.Application, eventInfo argo.EventInfo, message string) { 2614 eventLabels := argo.GetAppEventLabels(ctx, a, applisters.NewAppProjectLister(ctrl.projInformer.GetIndexer()), ctrl.namespace, ctrl.settingsMgr, ctrl.db) 2615 ctrl.auditLogger.LogAppEvent(a, eventInfo, message, "", eventLabels) 2616 } 2617 2618 type ClusterFilterFunction func(c *appv1.Cluster, distributionFunction sharding.DistributionFunction) bool