github.phpd.cn/cilium/cilium@v1.6.12/operator/main.go (about)

     1  // Copyright 2018-2020 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package main
    16  
    17  import (
    18  	"flag"
    19  	"fmt"
    20  	"os"
    21  	"os/signal"
    22  	"syscall"
    23  	"time"
    24  
    25  	"github.com/cilium/cilium/pkg/defaults"
    26  	"github.com/cilium/cilium/pkg/k8s"
    27  	clientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
    28  	"github.com/cilium/cilium/pkg/k8s/types"
    29  	k8sversion "github.com/cilium/cilium/pkg/k8s/version"
    30  	"github.com/cilium/cilium/pkg/kvstore"
    31  	"github.com/cilium/cilium/pkg/logging"
    32  	"github.com/cilium/cilium/pkg/logging/logfields"
    33  	"github.com/cilium/cilium/pkg/option"
    34  	"github.com/cilium/cilium/pkg/version"
    35  
    36  	gops "github.com/google/gops/agent"
    37  	"github.com/sirupsen/logrus"
    38  	"github.com/spf13/cobra"
    39  	"github.com/spf13/cobra/doc"
    40  	"github.com/spf13/viper"
    41  	"google.golang.org/grpc"
    42  	"k8s.io/apimachinery/pkg/api/errors"
    43  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    44  	"k8s.io/klog"
    45  )
    46  
    47  var (
    48  	log = logging.DefaultLogger.WithField(logfields.LogSubsys, "cilium-operator")
    49  
    50  	rootCmd = &cobra.Command{
    51  		Use:   "cilium-operator",
    52  		Short: "Run the cilium-operator",
    53  		Run: func(cmd *cobra.Command, args []string) {
    54  			runOperator(cmd)
    55  		},
    56  	}
    57  
    58  	k8sAPIServer        string
    59  	k8sKubeConfigPath   string
    60  	kvStore             string
    61  	kvStoreOpts         = make(map[string]string)
    62  	apiServerPort       uint16
    63  	shutdownSignal      = make(chan struct{})
    64  	synchronizeServices bool
    65  	enableCepGC         bool
    66  	synchronizeNodes    bool
    67  	enableMetrics       bool
    68  	metricsAddress      string
    69  	eniParallelWorkers  int64
    70  	enableENI           bool
    71  
    72  	k8sIdentityGCInterval       time.Duration
    73  	k8sIdentityHeartbeatTimeout time.Duration
    74  	ciliumK8sClient             clientset.Interface
    75  
    76  	cmdRefDir string
    77  )
    78  
    79  func main() {
    80  	signals := make(chan os.Signal, 1)
    81  	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
    82  
    83  	go func() {
    84  		<-signals
    85  		gops.Close()
    86  		close(shutdownSignal)
    87  	}()
    88  
    89  	// Open socket for using gops to get stacktraces of the agent.
    90  	if err := gops.Listen(gops.Options{}); err != nil {
    91  		errorString := fmt.Sprintf("unable to start gops: %s", err)
    92  		fmt.Println(errorString)
    93  		os.Exit(-1)
    94  	}
    95  
    96  	if err := rootCmd.Execute(); err != nil {
    97  		fmt.Println(err)
    98  		os.Exit(-1)
    99  	}
   100  }
   101  
   102  func init() {
   103  	cobra.OnInitialize(initConfig)
   104  
   105  	flags := rootCmd.Flags()
   106  	flags.Bool("version", false, "Print version information")
   107  	flags.Int(option.ClusterIDName, 0, "Unique identifier of the cluster")
   108  	option.BindEnv(option.ClusterIDName)
   109  	flags.String(option.ClusterName, defaults.ClusterName, "Name of the cluster")
   110  	option.BindEnv(option.ClusterName)
   111  	flags.BoolP("debug", "D", false, "Enable debugging mode")
   112  	flags.StringVar(&k8sAPIServer, "k8s-api-server", "", "Kubernetes api address server (for https use --k8s-kubeconfig-path instead)")
   113  	flags.StringVar(&k8sKubeConfigPath, "k8s-kubeconfig-path", "", "Absolute path of the kubernetes kubeconfig file")
   114  	flags.String(option.KVStore, "", "Key-value store type")
   115  	option.BindEnv(option.KVStore)
   116  	flags.Var(option.NewNamedMapOptions(option.KVStoreOpt, &kvStoreOpts, nil), option.KVStoreOpt, "Key-value store options")
   117  	option.BindEnv(option.KVStoreOpt)
   118  	flags.Uint16Var(&apiServerPort, "api-server-port", 9234, "Port on which the operator should serve API requests")
   119  	flags.String(option.IPAM, "", "Backend to use for IPAM")
   120  	option.BindEnv(option.IPAM)
   121  	flags.Bool(option.AwsReleaseExcessIps, false, "Enable releasing excess free IP addresses from AWS ENI.")
   122  	option.BindEnv(option.AwsReleaseExcessIps)
   123  	flags.BoolVar(&enableMetrics, "enable-metrics", false, "Enable Prometheus metrics")
   124  	flags.StringVar(&metricsAddress, "metrics-address", ":6942", "Address to serve Prometheus metrics")
   125  	flags.BoolVar(&synchronizeServices, "synchronize-k8s-services", true, "Synchronize Kubernetes services to kvstore")
   126  	flags.BoolVar(&synchronizeNodes, "synchronize-k8s-nodes", true, "Synchronize Kubernetes nodes to kvstore and perform CNP GC")
   127  	flags.DurationVar(&k8sIdentityHeartbeatTimeout, "identity-heartbeat-timeout", 15*time.Minute, "Timeout after which identity expires on lack of heartbeat")
   128  	flags.BoolVar(&enableCepGC, "cilium-endpoint-gc", true, "Enable CiliumEndpoint garbage collector")
   129  	flags.DurationVar(&ciliumEndpointGCInterval, "cilium-endpoint-gc-interval", time.Minute*30, "GC interval for cilium endpoints")
   130  	flags.StringVar(&identityAllocationMode, option.IdentityAllocationMode, option.IdentityAllocationModeKVstore, "Method to use for identity allocation")
   131  	option.BindEnv(option.IdentityAllocationMode)
   132  	flags.DurationVar(&identityGCInterval, "identity-gc-interval", defaults.KVstoreLeaseTTL, "GC interval for security identities")
   133  	flags.DurationVar(&kvNodeGCInterval, "nodes-gc-interval", time.Minute*2, "GC interval for nodes store in the kvstore")
   134  	flags.Int64Var(&eniParallelWorkers, "eni-parallel-workers", 50, "Maximum number of parallel workers used by ENI allocator")
   135  	flags.String(option.K8sNamespaceName, "", "Name of the Kubernetes namespace in which Cilium Operator is deployed in")
   136  	flags.MarkHidden(option.K8sNamespaceName)
   137  	option.BindEnv(option.K8sNamespaceName)
   138  
   139  	flags.IntVar(&unmanagedKubeDnsWatcherInterval, "unmanaged-pod-watcher-interval", 15, "Interval to check for unmanaged kube-dns pods (0 to disable)")
   140  
   141  	flags.Int(option.AWSClientBurst, 4, "Burst value allowed for the AWS client used by the AWS ENI IPAM")
   142  	flags.Float64(option.AWSClientQPSLimit, 20.0, "Queries per second limit for the AWS client used by the AWS ENI IPAM")
   143  
   144  	flags.Float32(option.K8sClientQPSLimit, defaults.K8sClientQPSLimit, "Queries per second limit for the K8s client")
   145  	flags.Int(option.K8sClientBurst, defaults.K8sClientBurst, "Burst value allowed for the K8s client")
   146  
   147  	// We need to obtain from Cilium ConfigMap if the CiliumEndpointCRD option
   148  	// is enabled or disabled. This option is marked as hidden because the
   149  	// Cilium Endpoint CRD controller is not in this program and by having it
   150  	// being printed by operator --help could confuse users.
   151  	flags.Bool(option.DisableCiliumEndpointCRDName, false, "")
   152  	flags.MarkHidden(option.DisableCiliumEndpointCRDName)
   153  	option.BindEnv(option.DisableCiliumEndpointCRDName)
   154  
   155  	flags.BoolVar(&enableCNPNodeStatusGC, "cnp-node-status-gc", true, "Enable CiliumNetworkPolicy Status garbage collection for nodes which have been removed from the cluster")
   156  	flags.DurationVar(&ciliumCNPNodeStatusGCInterval, "cnp-node-status-gc-interval", time.Minute*2, "GC interval for nodes which have been removed from the cluster in CiliumNetworkPolicy Status")
   157  
   158  	flags.StringVar(&cmdRefDir, "cmdref", "", "Path to cmdref output directory")
   159  	flags.MarkHidden("cmdref")
   160  	viper.BindPFlags(flags)
   161  
   162  	// Make sure that klog logging variables are initialized so that we can
   163  	// update them from this file.
   164  	klog.InitFlags(nil)
   165  
   166  	// Make sure klog (used by the client-go dependency) logs to stderr, as it
   167  	// will try to log to directories that may not exist in the cilium-operator
   168  	// container (/tmp) and cause the cilium-operator to exit.
   169  	flag.Set("logtostderr", "true")
   170  }
   171  
   172  // initConfig reads in config file and ENV variables if set.
   173  func initConfig() {
   174  	if viper.GetBool("version") {
   175  		fmt.Printf("Cilium %s\n", version.Version)
   176  		os.Exit(0)
   177  	}
   178  
   179  	option.Config.ClusterName = viper.GetString(option.ClusterName)
   180  	option.Config.ClusterID = viper.GetInt(option.ClusterIDName)
   181  	option.Config.DisableCiliumEndpointCRD = viper.GetBool(option.DisableCiliumEndpointCRDName)
   182  	option.Config.K8sNamespace = viper.GetString(option.K8sNamespaceName)
   183  	option.Config.AwsReleaseExcessIps = viper.GetBool(option.AwsReleaseExcessIps)
   184  
   185  	viper.SetEnvPrefix("cilium")
   186  	viper.SetConfigName("cilium-operator")
   187  }
   188  
   189  func kvstoreEnabled() bool {
   190  	if kvStore == "" {
   191  		return false
   192  	}
   193  
   194  	return identityAllocationMode == option.IdentityAllocationModeKVstore ||
   195  		synchronizeServices ||
   196  		synchronizeNodes
   197  }
   198  
   199  func getAPIServerAddr() []string {
   200  	return []string{fmt.Sprintf("127.0.0.1:%d", apiServerPort), fmt.Sprintf("[::1]:%d", apiServerPort)}
   201  }
   202  
   203  func runOperator(cmd *cobra.Command) {
   204  	logging.SetupLogging([]string{}, map[string]string{}, "cilium-operator", viper.GetBool("debug"))
   205  
   206  	if cmdRefDir != "" {
   207  		// Remove the line 'Auto generated by spf13/cobra on ...'
   208  		cmd.DisableAutoGenTag = true
   209  		if err := doc.GenMarkdownTreeCustom(cmd, cmdRefDir, filePrepend, linkHandler); err != nil {
   210  			log.Fatal(err)
   211  		}
   212  		os.Exit(0)
   213  	}
   214  
   215  	log.Infof("Cilium Operator %s", version.Version)
   216  	k8sInitDone := make(chan struct{})
   217  	go startServer(shutdownSignal, k8sInitDone, getAPIServerAddr()...)
   218  
   219  	if enableMetrics {
   220  		registerMetrics()
   221  	}
   222  
   223  	k8sClientQPSLimit := viper.GetFloat64(option.K8sClientQPSLimit)
   224  	k8sClientBurst := viper.GetInt(option.K8sClientBurst)
   225  	kvStore = viper.GetString(option.KVStore)
   226  	if m := viper.GetStringMapString(option.KVStoreOpt); len(m) > 0 {
   227  		kvStoreOpts = m
   228  	}
   229  
   230  	k8s.Configure(k8sAPIServer, k8sKubeConfigPath, float32(k8sClientQPSLimit), k8sClientBurst)
   231  	if err := k8s.Init(); err != nil {
   232  		log.WithError(err).Fatal("Unable to connect to Kubernetes apiserver")
   233  	}
   234  	close(k8sInitDone)
   235  
   236  	ciliumK8sClient = k8s.CiliumClient()
   237  	k8sversion.Update(k8s.Client())
   238  	if !k8sversion.Capabilities().MinimalVersionMet {
   239  		log.Fatalf("Minimal kubernetes version not met: %s < %s",
   240  			k8sversion.Version(), k8sversion.MinimalVersionConstraint)
   241  	}
   242  
   243  	// Restart kube-dns as soon as possible since it helps etcd-operator to be
   244  	// properly setup. If kube-dns is not managed by Cilium it can prevent
   245  	// etcd from reaching out kube-dns in EKS.
   246  	if option.Config.DisableCiliumEndpointCRD {
   247  		log.Infof("KubeDNS unmanaged pods controller disabled as %q option is set to 'disabled' in Cilium ConfigMap", option.DisableCiliumEndpointCRDName)
   248  	} else if unmanagedKubeDnsWatcherInterval != 0 {
   249  		enableUnmanagedKubeDNSController()
   250  	}
   251  
   252  	enableENI = viper.GetString(option.IPAM) == option.IPAMENI
   253  	if enableENI {
   254  		awsClientQPSLimit := viper.GetFloat64(option.AWSClientQPSLimit)
   255  		awsClientBurst := viper.GetInt(option.AWSClientBurst)
   256  		if err := startENIAllocator(awsClientQPSLimit, awsClientBurst); err != nil {
   257  			log.WithError(err).Fatal("Unable to start ENI allocator")
   258  		}
   259  	}
   260  
   261  	if enableENI {
   262  		startSynchronizingCiliumNodes()
   263  	}
   264  
   265  	if kvstoreEnabled() {
   266  		if synchronizeServices {
   267  			startSynchronizingServices()
   268  		}
   269  
   270  		var goopts *kvstore.ExtraOptions
   271  		scopedLog := log.WithFields(logrus.Fields{
   272  			"kvstore": kvStore,
   273  			"address": kvStoreOpts[fmt.Sprintf("%s.address", kvStore)],
   274  		})
   275  		if synchronizeServices {
   276  			// If K8s is enabled we can do the service translation automagically by
   277  			// looking at services from k8s and retrieve the service IP from that.
   278  			// This makes cilium to not depend on kube dns to interact with etcd
   279  			if k8s.IsEnabled() {
   280  				svcURL, isETCDOperator := kvstore.IsEtcdOperator(kvStore, kvStoreOpts, option.Config.K8sNamespace)
   281  				if isETCDOperator {
   282  					scopedLog.Info("cilium-operator running with service synchronization: automatic etcd service translation enabled")
   283  
   284  					svcGetter := k8s.ServiceIPGetter(&k8sSvcCache)
   285  
   286  					name, namespace, err := kvstore.SplitK8sServiceURL(svcURL)
   287  					if err != nil {
   288  						// If we couldn't derive the name/namespace for the given
   289  						// svcURL log the error so the user can see it.
   290  						// k8s.CreateCustomDialer won't be able to derive
   291  						// the name/namespace as well so it does not matter that
   292  						// we wait for all services to be synchronized with k8s.
   293  						scopedLog.WithError(err).WithFields(logrus.Fields{
   294  							"url": svcURL,
   295  						}).Error("Unable to derive service name from given url")
   296  					} else {
   297  						scopedLog.WithFields(logrus.Fields{
   298  							logfields.ServiceName:      name,
   299  							logfields.ServiceNamespace: namespace,
   300  						}).Info("Retrieving service spec from k8s to perform automatic etcd service translation")
   301  						k8sSvc, err := k8s.Client().CoreV1().Services(namespace).Get(name, metav1.GetOptions{})
   302  						switch {
   303  						case err == nil:
   304  							// Create another service cache that contains the
   305  							// k8s service for etcd. As soon the k8s caches are
   306  							// synced, this hijack will stop happening.
   307  							sc := k8s.NewServiceCache()
   308  							sc.UpdateService(&types.Service{Service: k8sSvc})
   309  							svcGetter = &serviceGetter{
   310  								shortCutK8sCache: &sc,
   311  								k8sCache:         &k8sSvcCache,
   312  							}
   313  							break
   314  						case errors.IsNotFound(err):
   315  							scopedLog.Error("Service not found in k8s")
   316  						default:
   317  							scopedLog.Warning("Unable to get service spec from k8s, this might cause network disruptions with etcd")
   318  						}
   319  					}
   320  
   321  					log := log.WithField(logfields.LogSubsys, "etcd")
   322  					goopts = &kvstore.ExtraOptions{
   323  						DialOption: []grpc.DialOption{
   324  							grpc.WithDialer(k8s.CreateCustomDialer(svcGetter, log)),
   325  						},
   326  					}
   327  				}
   328  			}
   329  		} else {
   330  			scopedLog.Info("cilium-operator running without service synchronization: automatic etcd service translation disabled")
   331  		}
   332  		scopedLog.Info("Connecting to kvstore...")
   333  		if err := kvstore.Setup(kvStore, kvStoreOpts, goopts); err != nil {
   334  			scopedLog.WithError(err).Fatal("Unable to setup kvstore")
   335  		}
   336  
   337  		if synchronizeNodes {
   338  			if err := runNodeWatcher(); err != nil {
   339  				log.WithError(err).Error("Unable to setup node watcher")
   340  			}
   341  		}
   342  
   343  		startKvstoreWatchdog()
   344  	}
   345  
   346  	switch identityAllocationMode {
   347  	case option.IdentityAllocationModeCRD:
   348  		if !k8s.IsEnabled() {
   349  			log.Fatal("CRD Identity allocation mode requires k8s to be configured.")
   350  		}
   351  
   352  		startManagingK8sIdentities()
   353  
   354  		if identityGCInterval != time.Duration(0) {
   355  			go startCRDIdentityGC()
   356  		}
   357  	}
   358  
   359  	if enableCepGC {
   360  		enableCiliumEndpointSyncGC()
   361  	}
   362  
   363  	if identityGCInterval != time.Duration(0) {
   364  		startIdentityGC()
   365  	}
   366  	err := enableCNPWatcher()
   367  	if err != nil {
   368  		log.WithError(err).WithField("subsys", "CNPWatcher").Fatal(
   369  			"Cannot connect to Kubernetes apiserver ")
   370  	}
   371  
   372  	log.Info("Initialization complete")
   373  
   374  	<-shutdownSignal
   375  	// graceful exit
   376  	log.Info("Received termination signal. Shutting down")
   377  	return
   378  }