github.com/argoproj/argo-cd/v3@v3.2.1/cmd/argocd-application-controller/commands/argocd_application_controller.go (about) 1 package commands 2 3 import ( 4 "context" 5 "fmt" 6 "math" 7 "os" 8 "os/signal" 9 "runtime/debug" 10 "syscall" 11 "time" 12 13 "github.com/argoproj/pkg/v2/stats" 14 "github.com/redis/go-redis/v9" 15 log "github.com/sirupsen/logrus" 16 "github.com/spf13/cobra" 17 "k8s.io/apimachinery/pkg/util/wait" 18 "k8s.io/client-go/kubernetes" 19 "k8s.io/client-go/tools/clientcmd" 20 21 cmdutil "github.com/argoproj/argo-cd/v3/cmd/util" 22 commitclient "github.com/argoproj/argo-cd/v3/commitserver/apiclient" 23 "github.com/argoproj/argo-cd/v3/common" 24 "github.com/argoproj/argo-cd/v3/controller" 25 "github.com/argoproj/argo-cd/v3/controller/sharding" 26 "github.com/argoproj/argo-cd/v3/pkg/apis/application/v1alpha1" 27 appclientset "github.com/argoproj/argo-cd/v3/pkg/client/clientset/versioned" 28 "github.com/argoproj/argo-cd/v3/pkg/ratelimiter" 29 "github.com/argoproj/argo-cd/v3/reposerver/apiclient" 30 "github.com/argoproj/argo-cd/v3/util/argo" 31 "github.com/argoproj/argo-cd/v3/util/argo/normalizers" 32 cacheutil "github.com/argoproj/argo-cd/v3/util/cache" 33 appstatecache "github.com/argoproj/argo-cd/v3/util/cache/appstate" 34 "github.com/argoproj/argo-cd/v3/util/cli" 35 "github.com/argoproj/argo-cd/v3/util/env" 36 "github.com/argoproj/argo-cd/v3/util/errors" 37 kubeutil "github.com/argoproj/argo-cd/v3/util/kube" 38 "github.com/argoproj/argo-cd/v3/util/settings" 39 "github.com/argoproj/argo-cd/v3/util/tls" 40 "github.com/argoproj/argo-cd/v3/util/trace" 41 ) 42 43 const ( 44 // CLIName is the name of the CLI 45 cliName = common.ApplicationController 46 // Default time in seconds for application resync period 47 defaultAppResyncPeriod = 120 48 // Default time in seconds for application resync period jitter 49 defaultAppResyncPeriodJitter = 60 50 // Default time in seconds for application hard resync period 51 defaultAppHardResyncPeriod = 0 52 // Default time in seconds for ignoring consecutive errors when comminicating with repo-server 53 defaultRepoErrorGracePeriod = defaultAppResyncPeriod + defaultAppResyncPeriodJitter 54 ) 55 56 func NewCommand() *cobra.Command { 57 var ( 58 workqueueRateLimit ratelimiter.AppControllerRateLimiterConfig 59 clientConfig clientcmd.ClientConfig 60 appResyncPeriod int64 61 appHardResyncPeriod int64 62 appResyncJitter int64 63 repoErrorGracePeriod int64 64 repoServerAddress string 65 repoServerTimeoutSeconds int 66 commitServerAddress string 67 selfHealTimeoutSeconds int 68 selfHealBackoffTimeoutSeconds int 69 selfHealBackoffFactor int 70 selfHealBackoffCapSeconds int 71 selfHealBackoffCooldownSeconds int 72 syncTimeout int 73 statusProcessors int 74 operationProcessors int 75 glogLevel int 76 metricsPort int 77 metricsCacheExpiration time.Duration 78 metricsAplicationLabels []string 79 metricsAplicationConditions []string 80 metricsClusterLabels []string 81 kubectlParallelismLimit int64 82 cacheSource func() (*appstatecache.Cache, error) 83 redisClient *redis.Client 84 repoServerPlaintext bool 85 repoServerStrictTLS bool 86 otlpAddress string 87 otlpInsecure bool 88 otlpHeaders map[string]string 89 otlpAttrs []string 90 applicationNamespaces []string 91 persistResourceHealth bool 92 shardingAlgorithm string 93 enableDynamicClusterDistribution bool 94 serverSideDiff bool 95 ignoreNormalizerOpts normalizers.IgnoreNormalizerOpts 96 97 // argocd k8s event logging flag 98 enableK8sEvent []string 99 hydratorEnabled bool 100 ) 101 command := cobra.Command{ 102 Use: cliName, 103 Short: "Run ArgoCD Application Controller", 104 Long: "ArgoCD application controller is a Kubernetes controller that continuously monitors running applications and compares the current, live state against the desired target state (as specified in the repo). This command runs Application Controller in the foreground. It can be configured by following options.", 105 DisableAutoGenTag: true, 106 RunE: func(c *cobra.Command, _ []string) error { 107 ctx, cancel := context.WithCancel(c.Context()) 108 defer cancel() 109 110 vers := common.GetVersion() 111 namespace, _, err := clientConfig.Namespace() 112 errors.CheckError(err) 113 vers.LogStartupInfo( 114 "ArgoCD Application Controller", 115 map[string]any{ 116 "namespace": namespace, 117 }, 118 ) 119 120 cli.SetLogFormat(cmdutil.LogFormat) 121 cli.SetLogLevel(cmdutil.LogLevel) 122 cli.SetGLogLevel(glogLevel) 123 124 // Recover from panic and log the error using the configured logger instead of the default. 125 defer func() { 126 if r := recover(); r != nil { 127 log.WithField("trace", string(debug.Stack())).Fatal("Recovered from panic: ", r) 128 } 129 }() 130 131 config, err := clientConfig.ClientConfig() 132 errors.CheckError(err) 133 errors.CheckError(v1alpha1.SetK8SConfigDefaults(config)) 134 config.UserAgent = fmt.Sprintf("%s/%s (%s)", common.DefaultApplicationControllerName, vers.Version, vers.Platform) 135 136 kubeClient := kubernetes.NewForConfigOrDie(config) 137 appClient := appclientset.NewForConfigOrDie(config) 138 139 hardResyncDuration := time.Duration(appHardResyncPeriod) * time.Second 140 141 var resyncDuration time.Duration 142 if appResyncPeriod == 0 { 143 // Re-sync should be disabled if period is 0. Set duration to a very long duration 144 resyncDuration = time.Hour * 24 * 365 * 100 145 } else { 146 resyncDuration = time.Duration(appResyncPeriod) * time.Second 147 } 148 149 tlsConfig := apiclient.TLSConfiguration{ 150 DisableTLS: repoServerPlaintext, 151 StrictValidation: repoServerStrictTLS, 152 } 153 154 // Load CA information to use for validating connections to the 155 // repository server, if strict TLS validation was requested. 156 if !repoServerPlaintext && repoServerStrictTLS { 157 pool, err := tls.LoadX509CertPool( 158 env.StringFromEnv(common.EnvAppConfigPath, common.DefaultAppConfigPath)+"/controller/tls/tls.crt", 159 env.StringFromEnv(common.EnvAppConfigPath, common.DefaultAppConfigPath)+"/controller/tls/ca.crt", 160 ) 161 if err != nil { 162 log.Fatalf("%v", err) 163 } 164 tlsConfig.Certificates = pool 165 } 166 167 repoClientset := apiclient.NewRepoServerClientset(repoServerAddress, repoServerTimeoutSeconds, tlsConfig) 168 169 commitClientset := commitclient.NewCommitServerClientset(commitServerAddress) 170 171 cache, err := cacheSource() 172 errors.CheckError(err) 173 cache.Cache.SetClient(cacheutil.NewTwoLevelClient(cache.Cache.GetClient(), 10*time.Minute)) 174 175 var appController *controller.ApplicationController 176 177 settingsMgr := settings.NewSettingsManager(ctx, kubeClient, namespace, settings.WithRepoOrClusterChangedHandler(func() { 178 appController.InvalidateProjectsCache() 179 })) 180 kubectl := kubeutil.NewKubectl() 181 clusterSharding, err := sharding.GetClusterSharding(kubeClient, settingsMgr, shardingAlgorithm, enableDynamicClusterDistribution) 182 errors.CheckError(err) 183 var selfHealBackoff *wait.Backoff 184 if selfHealBackoffTimeoutSeconds != 0 { 185 selfHealBackoff = &wait.Backoff{ 186 Duration: time.Duration(selfHealBackoffTimeoutSeconds) * time.Second, 187 Factor: float64(selfHealBackoffFactor), 188 Cap: time.Duration(selfHealBackoffCapSeconds) * time.Second, 189 } 190 } 191 appController, err = controller.NewApplicationController( 192 namespace, 193 settingsMgr, 194 kubeClient, 195 appClient, 196 repoClientset, 197 commitClientset, 198 cache, 199 kubectl, 200 resyncDuration, 201 hardResyncDuration, 202 time.Duration(appResyncJitter)*time.Second, 203 time.Duration(selfHealTimeoutSeconds)*time.Second, 204 selfHealBackoff, 205 time.Duration(selfHealBackoffCooldownSeconds)*time.Second, 206 time.Duration(syncTimeout)*time.Second, 207 time.Duration(repoErrorGracePeriod)*time.Second, 208 metricsPort, 209 metricsCacheExpiration, 210 metricsAplicationLabels, 211 metricsAplicationConditions, 212 metricsClusterLabels, 213 kubectlParallelismLimit, 214 persistResourceHealth, 215 clusterSharding, 216 applicationNamespaces, 217 &workqueueRateLimit, 218 serverSideDiff, 219 enableDynamicClusterDistribution, 220 ignoreNormalizerOpts, 221 enableK8sEvent, 222 hydratorEnabled, 223 ) 224 errors.CheckError(err) 225 cacheutil.CollectMetrics(redisClient, appController.GetMetricsServer(), nil) 226 227 stats.RegisterStackDumper() 228 stats.StartStatsTicker(10 * time.Minute) 229 stats.RegisterHeapDumper("memprofile") 230 231 if otlpAddress != "" { 232 closeTracer, err := trace.InitTracer(ctx, "argocd-controller", otlpAddress, otlpInsecure, otlpHeaders, otlpAttrs) 233 if err != nil { 234 log.Fatalf("failed to initialize tracing: %v", err) 235 } 236 defer closeTracer() 237 } 238 239 // Graceful shutdown code 240 sigCh := make(chan os.Signal, 1) 241 signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) 242 go func() { 243 s := <-sigCh 244 log.Printf("got signal %v, attempting graceful shutdown", s) 245 cancel() 246 }() 247 248 go appController.Run(ctx, statusProcessors, operationProcessors) 249 250 <-ctx.Done() 251 252 log.Println("clean shutdown") 253 254 return nil 255 }, 256 } 257 258 clientConfig = cli.AddKubectlFlagsToCmd(&command) 259 command.Flags().Int64Var(&appResyncPeriod, "app-resync", int64(env.ParseDurationFromEnv("ARGOCD_RECONCILIATION_TIMEOUT", defaultAppResyncPeriod*time.Second, 0, math.MaxInt64).Seconds()), "Time period in seconds for application resync.") 260 command.Flags().Int64Var(&appHardResyncPeriod, "app-hard-resync", int64(env.ParseDurationFromEnv("ARGOCD_HARD_RECONCILIATION_TIMEOUT", defaultAppHardResyncPeriod*time.Second, 0, math.MaxInt64).Seconds()), "Time period in seconds for application hard resync.") 261 command.Flags().Int64Var(&appResyncJitter, "app-resync-jitter", int64(env.ParseDurationFromEnv("ARGOCD_RECONCILIATION_JITTER", defaultAppResyncPeriodJitter*time.Second, 0, math.MaxInt64).Seconds()), "Maximum time period in seconds to add as a delay jitter for application resync.") 262 command.Flags().Int64Var(&repoErrorGracePeriod, "repo-error-grace-period-seconds", int64(env.ParseDurationFromEnv("ARGOCD_REPO_ERROR_GRACE_PERIOD_SECONDS", defaultRepoErrorGracePeriod*time.Second, 0, math.MaxInt64).Seconds()), "Grace period in seconds for ignoring consecutive errors while communicating with repo server.") 263 command.Flags().StringVar(&repoServerAddress, "repo-server", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER", common.DefaultRepoServerAddr), "Repo server address.") 264 command.Flags().IntVar(&repoServerTimeoutSeconds, "repo-server-timeout-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_TIMEOUT_SECONDS", 60, 0, math.MaxInt64), "Repo server RPC call timeout seconds.") 265 command.Flags().StringVar(&commitServerAddress, "commit-server", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_COMMIT_SERVER", common.DefaultCommitServerAddr), "Commit server address.") 266 command.Flags().IntVar(&statusProcessors, "status-processors", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_STATUS_PROCESSORS", 20, 0, math.MaxInt32), "Number of application status processors") 267 command.Flags().IntVar(&operationProcessors, "operation-processors", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_OPERATION_PROCESSORS", 10, 0, math.MaxInt32), "Number of application operation processors") 268 command.Flags().StringVar(&cmdutil.LogFormat, "logformat", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_LOGFORMAT", "json"), "Set the logging format. One of: json|text") 269 command.Flags().StringVar(&cmdutil.LogLevel, "loglevel", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_LOGLEVEL", "info"), "Set the logging level. One of: debug|info|warn|error") 270 command.Flags().IntVar(&glogLevel, "gloglevel", 0, "Set the glog logging level") 271 command.Flags().IntVar(&metricsPort, "metrics-port", common.DefaultPortArgoCDMetrics, "Start metrics server on given port") 272 command.Flags().DurationVar(&metricsCacheExpiration, "metrics-cache-expiration", env.ParseDurationFromEnv("ARGOCD_APPLICATION_CONTROLLER_METRICS_CACHE_EXPIRATION", 0*time.Second, 0, math.MaxInt64), "Prometheus metrics cache expiration (disabled by default. e.g. 24h0m0s)") 273 command.Flags().IntVar(&selfHealTimeoutSeconds, "self-heal-timeout-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_TIMEOUT_SECONDS", 0, 0, math.MaxInt32), "Specifies timeout between application self heal attempts") 274 command.Flags().IntVar(&selfHealBackoffTimeoutSeconds, "self-heal-backoff-timeout-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_TIMEOUT_SECONDS", 2, 0, math.MaxInt32), "Specifies initial timeout of exponential backoff between self heal attempts") 275 command.Flags().IntVar(&selfHealBackoffFactor, "self-heal-backoff-factor", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_FACTOR", 3, 0, math.MaxInt32), "Specifies factor of exponential timeout between application self heal attempts") 276 command.Flags().IntVar(&selfHealBackoffCapSeconds, "self-heal-backoff-cap-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_CAP_SECONDS", 300, 0, math.MaxInt32), "Specifies max timeout of exponential backoff between application self heal attempts") 277 command.Flags().IntVar(&selfHealBackoffCooldownSeconds, "self-heal-backoff-cooldown-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_COOLDOWN_SECONDS", 330, 0, math.MaxInt32), "Specifies period of time the app needs to stay synced before the self heal backoff can reset") 278 command.Flags().IntVar(&syncTimeout, "sync-timeout", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SYNC_TIMEOUT", 0, 0, math.MaxInt32), "Specifies the timeout after which a sync would be terminated. 0 means no timeout (default 0).") 279 command.Flags().Int64Var(&kubectlParallelismLimit, "kubectl-parallelism-limit", env.ParseInt64FromEnv("ARGOCD_APPLICATION_CONTROLLER_KUBECTL_PARALLELISM_LIMIT", 20, 0, math.MaxInt64), "Number of allowed concurrent kubectl fork/execs. Any value less than 1 means no limit.") 280 command.Flags().BoolVar(&repoServerPlaintext, "repo-server-plaintext", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_PLAINTEXT", false), "Disable TLS on connections to repo server") 281 command.Flags().BoolVar(&repoServerStrictTLS, "repo-server-strict-tls", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_STRICT_TLS", false), "Whether to use strict validation of the TLS cert presented by the repo server") 282 command.Flags().StringSliceVar(&metricsAplicationLabels, "metrics-application-labels", []string{}, "List of Application labels that will be added to the argocd_application_labels metric") 283 command.Flags().StringSliceVar(&metricsAplicationConditions, "metrics-application-conditions", []string{}, "List of Application conditions that will be added to the argocd_application_conditions metric") 284 command.Flags().StringSliceVar(&metricsClusterLabels, "metrics-cluster-labels", []string{}, "List of Cluster labels that will be added to the argocd_cluster_labels metric") 285 command.Flags().StringVar(&otlpAddress, "otlp-address", env.StringFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_ADDRESS", ""), "OpenTelemetry collector address to send traces to") 286 command.Flags().BoolVar(&otlpInsecure, "otlp-insecure", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_INSECURE", true), "OpenTelemetry collector insecure mode") 287 command.Flags().StringToStringVar(&otlpHeaders, "otlp-headers", env.ParseStringToStringFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_HEADERS", map[string]string{}, ","), "List of OpenTelemetry collector extra headers sent with traces, headers are comma-separated key-value pairs(e.g. key1=value1,key2=value2)") 288 command.Flags().StringSliceVar(&otlpAttrs, "otlp-attrs", env.StringsFromEnv("ARGOCD_APPLICATION_CONTROLLER_OTLP_ATTRS", []string{}, ","), "List of OpenTelemetry collector extra attrs when send traces, each attribute is separated by a colon(e.g. key:value)") 289 command.Flags().StringSliceVar(&applicationNamespaces, "application-namespaces", env.StringsFromEnv("ARGOCD_APPLICATION_NAMESPACES", []string{}, ","), "List of additional namespaces that applications are allowed to be reconciled from") 290 command.Flags().BoolVar(&persistResourceHealth, "persist-resource-health", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_PERSIST_RESOURCE_HEALTH", false), "Enables storing the managed resources health in the Application CRD") 291 command.Flags().StringVar(&shardingAlgorithm, "sharding-method", env.StringFromEnv(common.EnvControllerShardingAlgorithm, common.DefaultShardingAlgorithm), "Enables choice of sharding method. Supported sharding methods are : [legacy, round-robin, consistent-hashing] ") 292 // global queue rate limit config 293 command.Flags().Int64Var(&workqueueRateLimit.BucketSize, "wq-bucket-size", env.ParseInt64FromEnv("WORKQUEUE_BUCKET_SIZE", 500, 1, math.MaxInt64), "Set Workqueue Rate Limiter Bucket Size, default 500") 294 command.Flags().Float64Var(&workqueueRateLimit.BucketQPS, "wq-bucket-qps", env.ParseFloat64FromEnv("WORKQUEUE_BUCKET_QPS", math.MaxFloat64, 1, math.MaxFloat64), "Set Workqueue Rate Limiter Bucket QPS, default set to MaxFloat64 which disables the bucket limiter") 295 // individual item rate limit config 296 // when WORKQUEUE_FAILURE_COOLDOWN is 0 per item rate limiting is disabled(default) 297 command.Flags().DurationVar(&workqueueRateLimit.FailureCoolDown, "wq-cooldown-ns", time.Duration(env.ParseInt64FromEnv("WORKQUEUE_FAILURE_COOLDOWN_NS", 0, 0, (24*time.Hour).Nanoseconds())), "Set Workqueue Per Item Rate Limiter Cooldown duration in ns, default 0(per item rate limiter disabled)") 298 command.Flags().DurationVar(&workqueueRateLimit.BaseDelay, "wq-basedelay-ns", time.Duration(env.ParseInt64FromEnv("WORKQUEUE_BASE_DELAY_NS", time.Millisecond.Nanoseconds(), time.Nanosecond.Nanoseconds(), (24*time.Hour).Nanoseconds())), "Set Workqueue Per Item Rate Limiter Base Delay duration in nanoseconds, default 1000000 (1ms)") 299 command.Flags().DurationVar(&workqueueRateLimit.MaxDelay, "wq-maxdelay-ns", time.Duration(env.ParseInt64FromEnv("WORKQUEUE_MAX_DELAY_NS", time.Second.Nanoseconds(), 1*time.Millisecond.Nanoseconds(), (24*time.Hour).Nanoseconds())), "Set Workqueue Per Item Rate Limiter Max Delay duration in nanoseconds, default 1000000000 (1s)") 300 command.Flags().Float64Var(&workqueueRateLimit.BackoffFactor, "wq-backoff-factor", env.ParseFloat64FromEnv("WORKQUEUE_BACKOFF_FACTOR", 1.5, 0, math.MaxFloat64), "Set Workqueue Per Item Rate Limiter Backoff Factor, default is 1.5") 301 command.Flags().BoolVar(&enableDynamicClusterDistribution, "dynamic-cluster-distribution-enabled", env.ParseBoolFromEnv(common.EnvEnableDynamicClusterDistribution, false), "Enables dynamic cluster distribution.") 302 command.Flags().BoolVar(&serverSideDiff, "server-side-diff-enabled", env.ParseBoolFromEnv(common.EnvServerSideDiff, false), "Feature flag to enable ServerSide diff. Default (\"false\")") 303 command.Flags().DurationVar(&ignoreNormalizerOpts.JQExecutionTimeout, "ignore-normalizer-jq-execution-timeout-seconds", env.ParseDurationFromEnv("ARGOCD_IGNORE_NORMALIZER_JQ_TIMEOUT", 0*time.Second, 0, math.MaxInt64), "Set ignore normalizer JQ execution timeout") 304 // argocd k8s event logging flag 305 command.Flags().StringSliceVar(&enableK8sEvent, "enable-k8s-event", env.StringsFromEnv("ARGOCD_ENABLE_K8S_EVENT", argo.DefaultEnableEventList(), ","), "Enable ArgoCD to use k8s event. For disabling all events, set the value as `none`. (e.g --enable-k8s-event=none), For enabling specific events, set the value as `event reason`. (e.g --enable-k8s-event=StatusRefreshed,ResourceCreated)") 306 command.Flags().BoolVar(&hydratorEnabled, "hydrator-enabled", env.ParseBoolFromEnv("ARGOCD_HYDRATOR_ENABLED", false), "Feature flag to enable Hydrator. Default (\"false\")") 307 cacheSource = appstatecache.AddCacheFlagsToCmd(&command, cacheutil.Options{ 308 OnClientCreated: func(client *redis.Client) { 309 redisClient = client 310 }, 311 }) 312 return &command 313 }