github.phpd.cn/cilium/cilium@v1.6.12/operator/main.go (about) 1 // Copyright 2018-2020 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package main 16 17 import ( 18 "flag" 19 "fmt" 20 "os" 21 "os/signal" 22 "syscall" 23 "time" 24 25 "github.com/cilium/cilium/pkg/defaults" 26 "github.com/cilium/cilium/pkg/k8s" 27 clientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned" 28 "github.com/cilium/cilium/pkg/k8s/types" 29 k8sversion "github.com/cilium/cilium/pkg/k8s/version" 30 "github.com/cilium/cilium/pkg/kvstore" 31 "github.com/cilium/cilium/pkg/logging" 32 "github.com/cilium/cilium/pkg/logging/logfields" 33 "github.com/cilium/cilium/pkg/option" 34 "github.com/cilium/cilium/pkg/version" 35 36 gops "github.com/google/gops/agent" 37 "github.com/sirupsen/logrus" 38 "github.com/spf13/cobra" 39 "github.com/spf13/cobra/doc" 40 "github.com/spf13/viper" 41 "google.golang.org/grpc" 42 "k8s.io/apimachinery/pkg/api/errors" 43 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 44 "k8s.io/klog" 45 ) 46 47 var ( 48 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "cilium-operator") 49 50 rootCmd = &cobra.Command{ 51 Use: "cilium-operator", 52 Short: "Run the cilium-operator", 53 Run: func(cmd *cobra.Command, args []string) { 54 runOperator(cmd) 55 }, 56 } 57 58 k8sAPIServer string 59 k8sKubeConfigPath string 60 kvStore string 61 kvStoreOpts = make(map[string]string) 62 apiServerPort uint16 63 shutdownSignal = make(chan struct{}) 64 synchronizeServices bool 65 enableCepGC bool 66 synchronizeNodes bool 67 enableMetrics bool 68 metricsAddress string 69 eniParallelWorkers int64 70 enableENI bool 71 72 k8sIdentityGCInterval time.Duration 73 k8sIdentityHeartbeatTimeout time.Duration 74 ciliumK8sClient clientset.Interface 75 76 cmdRefDir string 77 ) 78 79 func main() { 80 signals := make(chan os.Signal, 1) 81 signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 82 83 go func() { 84 <-signals 85 gops.Close() 86 close(shutdownSignal) 87 }() 88 89 // Open socket for using gops to get stacktraces of the agent. 90 if err := gops.Listen(gops.Options{}); err != nil { 91 errorString := fmt.Sprintf("unable to start gops: %s", err) 92 fmt.Println(errorString) 93 os.Exit(-1) 94 } 95 96 if err := rootCmd.Execute(); err != nil { 97 fmt.Println(err) 98 os.Exit(-1) 99 } 100 } 101 102 func init() { 103 cobra.OnInitialize(initConfig) 104 105 flags := rootCmd.Flags() 106 flags.Bool("version", false, "Print version information") 107 flags.Int(option.ClusterIDName, 0, "Unique identifier of the cluster") 108 option.BindEnv(option.ClusterIDName) 109 flags.String(option.ClusterName, defaults.ClusterName, "Name of the cluster") 110 option.BindEnv(option.ClusterName) 111 flags.BoolP("debug", "D", false, "Enable debugging mode") 112 flags.StringVar(&k8sAPIServer, "k8s-api-server", "", "Kubernetes api address server (for https use --k8s-kubeconfig-path instead)") 113 flags.StringVar(&k8sKubeConfigPath, "k8s-kubeconfig-path", "", "Absolute path of the kubernetes kubeconfig file") 114 flags.String(option.KVStore, "", "Key-value store type") 115 option.BindEnv(option.KVStore) 116 flags.Var(option.NewNamedMapOptions(option.KVStoreOpt, &kvStoreOpts, nil), option.KVStoreOpt, "Key-value store options") 117 option.BindEnv(option.KVStoreOpt) 118 flags.Uint16Var(&apiServerPort, "api-server-port", 9234, "Port on which the operator should serve API requests") 119 flags.String(option.IPAM, "", "Backend to use for IPAM") 120 option.BindEnv(option.IPAM) 121 flags.Bool(option.AwsReleaseExcessIps, false, "Enable releasing excess free IP addresses from AWS ENI.") 122 option.BindEnv(option.AwsReleaseExcessIps) 123 flags.BoolVar(&enableMetrics, "enable-metrics", false, "Enable Prometheus metrics") 124 flags.StringVar(&metricsAddress, "metrics-address", ":6942", "Address to serve Prometheus metrics") 125 flags.BoolVar(&synchronizeServices, "synchronize-k8s-services", true, "Synchronize Kubernetes services to kvstore") 126 flags.BoolVar(&synchronizeNodes, "synchronize-k8s-nodes", true, "Synchronize Kubernetes nodes to kvstore and perform CNP GC") 127 flags.DurationVar(&k8sIdentityHeartbeatTimeout, "identity-heartbeat-timeout", 15*time.Minute, "Timeout after which identity expires on lack of heartbeat") 128 flags.BoolVar(&enableCepGC, "cilium-endpoint-gc", true, "Enable CiliumEndpoint garbage collector") 129 flags.DurationVar(&ciliumEndpointGCInterval, "cilium-endpoint-gc-interval", time.Minute*30, "GC interval for cilium endpoints") 130 flags.StringVar(&identityAllocationMode, option.IdentityAllocationMode, option.IdentityAllocationModeKVstore, "Method to use for identity allocation") 131 option.BindEnv(option.IdentityAllocationMode) 132 flags.DurationVar(&identityGCInterval, "identity-gc-interval", defaults.KVstoreLeaseTTL, "GC interval for security identities") 133 flags.DurationVar(&kvNodeGCInterval, "nodes-gc-interval", time.Minute*2, "GC interval for nodes store in the kvstore") 134 flags.Int64Var(&eniParallelWorkers, "eni-parallel-workers", 50, "Maximum number of parallel workers used by ENI allocator") 135 flags.String(option.K8sNamespaceName, "", "Name of the Kubernetes namespace in which Cilium Operator is deployed in") 136 flags.MarkHidden(option.K8sNamespaceName) 137 option.BindEnv(option.K8sNamespaceName) 138 139 flags.IntVar(&unmanagedKubeDnsWatcherInterval, "unmanaged-pod-watcher-interval", 15, "Interval to check for unmanaged kube-dns pods (0 to disable)") 140 141 flags.Int(option.AWSClientBurst, 4, "Burst value allowed for the AWS client used by the AWS ENI IPAM") 142 flags.Float64(option.AWSClientQPSLimit, 20.0, "Queries per second limit for the AWS client used by the AWS ENI IPAM") 143 144 flags.Float32(option.K8sClientQPSLimit, defaults.K8sClientQPSLimit, "Queries per second limit for the K8s client") 145 flags.Int(option.K8sClientBurst, defaults.K8sClientBurst, "Burst value allowed for the K8s client") 146 147 // We need to obtain from Cilium ConfigMap if the CiliumEndpointCRD option 148 // is enabled or disabled. This option is marked as hidden because the 149 // Cilium Endpoint CRD controller is not in this program and by having it 150 // being printed by operator --help could confuse users. 151 flags.Bool(option.DisableCiliumEndpointCRDName, false, "") 152 flags.MarkHidden(option.DisableCiliumEndpointCRDName) 153 option.BindEnv(option.DisableCiliumEndpointCRDName) 154 155 flags.BoolVar(&enableCNPNodeStatusGC, "cnp-node-status-gc", true, "Enable CiliumNetworkPolicy Status garbage collection for nodes which have been removed from the cluster") 156 flags.DurationVar(&ciliumCNPNodeStatusGCInterval, "cnp-node-status-gc-interval", time.Minute*2, "GC interval for nodes which have been removed from the cluster in CiliumNetworkPolicy Status") 157 158 flags.StringVar(&cmdRefDir, "cmdref", "", "Path to cmdref output directory") 159 flags.MarkHidden("cmdref") 160 viper.BindPFlags(flags) 161 162 // Make sure that klog logging variables are initialized so that we can 163 // update them from this file. 164 klog.InitFlags(nil) 165 166 // Make sure klog (used by the client-go dependency) logs to stderr, as it 167 // will try to log to directories that may not exist in the cilium-operator 168 // container (/tmp) and cause the cilium-operator to exit. 169 flag.Set("logtostderr", "true") 170 } 171 172 // initConfig reads in config file and ENV variables if set. 173 func initConfig() { 174 if viper.GetBool("version") { 175 fmt.Printf("Cilium %s\n", version.Version) 176 os.Exit(0) 177 } 178 179 option.Config.ClusterName = viper.GetString(option.ClusterName) 180 option.Config.ClusterID = viper.GetInt(option.ClusterIDName) 181 option.Config.DisableCiliumEndpointCRD = viper.GetBool(option.DisableCiliumEndpointCRDName) 182 option.Config.K8sNamespace = viper.GetString(option.K8sNamespaceName) 183 option.Config.AwsReleaseExcessIps = viper.GetBool(option.AwsReleaseExcessIps) 184 185 viper.SetEnvPrefix("cilium") 186 viper.SetConfigName("cilium-operator") 187 } 188 189 func kvstoreEnabled() bool { 190 if kvStore == "" { 191 return false 192 } 193 194 return identityAllocationMode == option.IdentityAllocationModeKVstore || 195 synchronizeServices || 196 synchronizeNodes 197 } 198 199 func getAPIServerAddr() []string { 200 return []string{fmt.Sprintf("127.0.0.1:%d", apiServerPort), fmt.Sprintf("[::1]:%d", apiServerPort)} 201 } 202 203 func runOperator(cmd *cobra.Command) { 204 logging.SetupLogging([]string{}, map[string]string{}, "cilium-operator", viper.GetBool("debug")) 205 206 if cmdRefDir != "" { 207 // Remove the line 'Auto generated by spf13/cobra on ...' 208 cmd.DisableAutoGenTag = true 209 if err := doc.GenMarkdownTreeCustom(cmd, cmdRefDir, filePrepend, linkHandler); err != nil { 210 log.Fatal(err) 211 } 212 os.Exit(0) 213 } 214 215 log.Infof("Cilium Operator %s", version.Version) 216 k8sInitDone := make(chan struct{}) 217 go startServer(shutdownSignal, k8sInitDone, getAPIServerAddr()...) 218 219 if enableMetrics { 220 registerMetrics() 221 } 222 223 k8sClientQPSLimit := viper.GetFloat64(option.K8sClientQPSLimit) 224 k8sClientBurst := viper.GetInt(option.K8sClientBurst) 225 kvStore = viper.GetString(option.KVStore) 226 if m := viper.GetStringMapString(option.KVStoreOpt); len(m) > 0 { 227 kvStoreOpts = m 228 } 229 230 k8s.Configure(k8sAPIServer, k8sKubeConfigPath, float32(k8sClientQPSLimit), k8sClientBurst) 231 if err := k8s.Init(); err != nil { 232 log.WithError(err).Fatal("Unable to connect to Kubernetes apiserver") 233 } 234 close(k8sInitDone) 235 236 ciliumK8sClient = k8s.CiliumClient() 237 k8sversion.Update(k8s.Client()) 238 if !k8sversion.Capabilities().MinimalVersionMet { 239 log.Fatalf("Minimal kubernetes version not met: %s < %s", 240 k8sversion.Version(), k8sversion.MinimalVersionConstraint) 241 } 242 243 // Restart kube-dns as soon as possible since it helps etcd-operator to be 244 // properly setup. If kube-dns is not managed by Cilium it can prevent 245 // etcd from reaching out kube-dns in EKS. 246 if option.Config.DisableCiliumEndpointCRD { 247 log.Infof("KubeDNS unmanaged pods controller disabled as %q option is set to 'disabled' in Cilium ConfigMap", option.DisableCiliumEndpointCRDName) 248 } else if unmanagedKubeDnsWatcherInterval != 0 { 249 enableUnmanagedKubeDNSController() 250 } 251 252 enableENI = viper.GetString(option.IPAM) == option.IPAMENI 253 if enableENI { 254 awsClientQPSLimit := viper.GetFloat64(option.AWSClientQPSLimit) 255 awsClientBurst := viper.GetInt(option.AWSClientBurst) 256 if err := startENIAllocator(awsClientQPSLimit, awsClientBurst); err != nil { 257 log.WithError(err).Fatal("Unable to start ENI allocator") 258 } 259 } 260 261 if enableENI { 262 startSynchronizingCiliumNodes() 263 } 264 265 if kvstoreEnabled() { 266 if synchronizeServices { 267 startSynchronizingServices() 268 } 269 270 var goopts *kvstore.ExtraOptions 271 scopedLog := log.WithFields(logrus.Fields{ 272 "kvstore": kvStore, 273 "address": kvStoreOpts[fmt.Sprintf("%s.address", kvStore)], 274 }) 275 if synchronizeServices { 276 // If K8s is enabled we can do the service translation automagically by 277 // looking at services from k8s and retrieve the service IP from that. 278 // This makes cilium to not depend on kube dns to interact with etcd 279 if k8s.IsEnabled() { 280 svcURL, isETCDOperator := kvstore.IsEtcdOperator(kvStore, kvStoreOpts, option.Config.K8sNamespace) 281 if isETCDOperator { 282 scopedLog.Info("cilium-operator running with service synchronization: automatic etcd service translation enabled") 283 284 svcGetter := k8s.ServiceIPGetter(&k8sSvcCache) 285 286 name, namespace, err := kvstore.SplitK8sServiceURL(svcURL) 287 if err != nil { 288 // If we couldn't derive the name/namespace for the given 289 // svcURL log the error so the user can see it. 290 // k8s.CreateCustomDialer won't be able to derive 291 // the name/namespace as well so it does not matter that 292 // we wait for all services to be synchronized with k8s. 293 scopedLog.WithError(err).WithFields(logrus.Fields{ 294 "url": svcURL, 295 }).Error("Unable to derive service name from given url") 296 } else { 297 scopedLog.WithFields(logrus.Fields{ 298 logfields.ServiceName: name, 299 logfields.ServiceNamespace: namespace, 300 }).Info("Retrieving service spec from k8s to perform automatic etcd service translation") 301 k8sSvc, err := k8s.Client().CoreV1().Services(namespace).Get(name, metav1.GetOptions{}) 302 switch { 303 case err == nil: 304 // Create another service cache that contains the 305 // k8s service for etcd. As soon the k8s caches are 306 // synced, this hijack will stop happening. 307 sc := k8s.NewServiceCache() 308 sc.UpdateService(&types.Service{Service: k8sSvc}) 309 svcGetter = &serviceGetter{ 310 shortCutK8sCache: &sc, 311 k8sCache: &k8sSvcCache, 312 } 313 break 314 case errors.IsNotFound(err): 315 scopedLog.Error("Service not found in k8s") 316 default: 317 scopedLog.Warning("Unable to get service spec from k8s, this might cause network disruptions with etcd") 318 } 319 } 320 321 log := log.WithField(logfields.LogSubsys, "etcd") 322 goopts = &kvstore.ExtraOptions{ 323 DialOption: []grpc.DialOption{ 324 grpc.WithDialer(k8s.CreateCustomDialer(svcGetter, log)), 325 }, 326 } 327 } 328 } 329 } else { 330 scopedLog.Info("cilium-operator running without service synchronization: automatic etcd service translation disabled") 331 } 332 scopedLog.Info("Connecting to kvstore...") 333 if err := kvstore.Setup(kvStore, kvStoreOpts, goopts); err != nil { 334 scopedLog.WithError(err).Fatal("Unable to setup kvstore") 335 } 336 337 if synchronizeNodes { 338 if err := runNodeWatcher(); err != nil { 339 log.WithError(err).Error("Unable to setup node watcher") 340 } 341 } 342 343 startKvstoreWatchdog() 344 } 345 346 switch identityAllocationMode { 347 case option.IdentityAllocationModeCRD: 348 if !k8s.IsEnabled() { 349 log.Fatal("CRD Identity allocation mode requires k8s to be configured.") 350 } 351 352 startManagingK8sIdentities() 353 354 if identityGCInterval != time.Duration(0) { 355 go startCRDIdentityGC() 356 } 357 } 358 359 if enableCepGC { 360 enableCiliumEndpointSyncGC() 361 } 362 363 if identityGCInterval != time.Duration(0) { 364 startIdentityGC() 365 } 366 err := enableCNPWatcher() 367 if err != nil { 368 log.WithError(err).WithField("subsys", "CNPWatcher").Fatal( 369 "Cannot connect to Kubernetes apiserver ") 370 } 371 372 log.Info("Initialization complete") 373 374 <-shutdownSignal 375 // graceful exit 376 log.Info("Received termination signal. Shutting down") 377 return 378 }