github.com/verrazzano/verrazzano@v1.7.1/cluster-operator/internal/operatorinit/run_operator.go (about)

     1  // Copyright (c) 2022, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package operatorinit
     5  
     6  import (
     7  	"context"
     8  	"github.com/pkg/errors"
     9  	"github.com/prometheus/client_golang/prometheus/promhttp"
    10  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/capi"
    11  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/controller"
    12  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/controller/oci"
    13  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/ociocne"
    14  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/oke"
    15  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/rancher"
    16  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/vmc"
    17  	"github.com/verrazzano/verrazzano/pkg/k8sutil"
    18  	"github.com/verrazzano/verrazzano/pkg/log/vzlog"
    19  	"github.com/verrazzano/verrazzano/pkg/nginxutil"
    20  	"github.com/verrazzano/verrazzano/pkg/rancherutil"
    21  	"go.uber.org/zap"
    22  	apiextv1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1"
    23  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    24  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    25  	"k8s.io/apimachinery/pkg/runtime"
    26  	"net/http"
    27  	"os"
    28  	ctrl "sigs.k8s.io/controller-runtime"
    29  	"sigs.k8s.io/controller-runtime/pkg/healthz"
    30  	"sigs.k8s.io/yaml"
    31  	"strings"
    32  	"time"
    33  )
    34  
    35  const (
    36  	clusterSelectorFilePath = "/var/syncClusters/selector.yaml"
    37  	syncClustersEnvVarName  = "CLUSTER_SYNC_ENABLED"
    38  	cattleClustersCRDName   = "clusters.management.cattle.io"
    39  	capiClustersCRDName     = "clusters.cluster.x-k8s.io"
    40  )
    41  
    42  type Properties struct {
    43  	Scheme                         *runtime.Scheme
    44  	CertificateDir                 string
    45  	MetricsAddress                 string
    46  	ProbeAddress                   string
    47  	IngressHost                    string
    48  	EnableLeaderElection           bool
    49  	EnableQuickCreate              bool
    50  	DisableCAPIRancherRegistration bool
    51  }
    52  
    53  // StartClusterOperator Cluster operator execution entry point
    54  func StartClusterOperator(log *zap.SugaredLogger, props Properties) error {
    55  	options := ctrl.Options{
    56  		Scheme:                 props.Scheme,
    57  		MetricsBindAddress:     props.MetricsAddress,
    58  		Port:                   9443,
    59  		HealthProbeBindAddress: props.ProbeAddress,
    60  		LeaderElection:         props.EnableLeaderElection,
    61  		LeaderElectionID:       "42d5ea87.verrazzano.io",
    62  	}
    63  
    64  	ingressNGINXNamespace, err := nginxutil.DetermineNamespaceForIngressNGINX(vzlog.DefaultLogger())
    65  	if err != nil {
    66  		return err
    67  	}
    68  	nginxutil.SetIngressNGINXNamespace(ingressNGINXNamespace)
    69  
    70  	ctrlConfig := k8sutil.GetConfigOrDieFromController()
    71  	mgr, err := ctrl.NewManager(ctrlConfig, options)
    72  	if err != nil {
    73  		return errors.Wrapf(err, "Failed to setup controller manager")
    74  	}
    75  
    76  	apiextv1Client := apiextv1.NewForConfigOrDie(ctrlConfig)
    77  	crdInstalled, err := isCRDInstalled(apiextv1Client, cattleClustersCRDName)
    78  	if err != nil {
    79  		log.Error(err, "unable to determine if cattle CRD is installed")
    80  		os.Exit(1)
    81  	}
    82  
    83  	// only start the Rancher cluster sync controller if the cattle clusters CRD is installed
    84  	if crdInstalled {
    85  		syncEnabled, clusterSelector, err := shouldSyncClusters(clusterSelectorFilePath)
    86  		if err != nil {
    87  			log.Error(err, "error processing cluster sync config")
    88  			os.Exit(1)
    89  		}
    90  
    91  		if err = (&rancher.RancherClusterReconciler{
    92  			Client:             mgr.GetClient(),
    93  			ClusterSyncEnabled: syncEnabled,
    94  			ClusterSelector:    clusterSelector,
    95  			Log:                log,
    96  			Scheme:             mgr.GetScheme(),
    97  		}).SetupWithManager(mgr); err != nil {
    98  			log.Errorf("Failed to create Rancher cluster controller: %v", err)
    99  			os.Exit(1)
   100  		}
   101  	}
   102  
   103  	capiCrdInstalled, err := isCRDInstalled(apiextv1Client, capiClustersCRDName)
   104  	if err != nil {
   105  		log.Error(err, "unable to determine if CAPI CRD is installed")
   106  		os.Exit(1)
   107  	}
   108  
   109  	if props.IngressHost == "" {
   110  		props.IngressHost = rancherutil.DefaultRancherIngressHostPrefix + nginxutil.IngressNGINXNamespace()
   111  	}
   112  
   113  	// only start the CAPI cluster controller if the clusters CRD is installed and the controller is enabled
   114  	if capiCrdInstalled && !props.DisableCAPIRancherRegistration {
   115  		log.Infof("Starting CAPI Cluster controller")
   116  		if err = (&capi.CAPIClusterReconciler{
   117  			Client:             mgr.GetClient(),
   118  			Log:                log,
   119  			Scheme:             mgr.GetScheme(),
   120  			RancherIngressHost: props.IngressHost,
   121  			RancherEnabled:     crdInstalled,
   122  		}).SetupWithManager(mgr); err != nil {
   123  			log.Errorf("Failed to create CAPI cluster controller: %v", err)
   124  			os.Exit(1)
   125  		}
   126  	}
   127  
   128  	// Set up the reconciler for VerrazzanoManagedCluster objects
   129  	if err = (&vmc.VerrazzanoManagedClusterReconciler{
   130  		Client:             mgr.GetClient(),
   131  		Scheme:             mgr.GetScheme(),
   132  		RancherIngressHost: props.IngressHost,
   133  	}).SetupWithManager(mgr); err != nil {
   134  		log.Error(err, "Failed to setup controller VerrazzanoManagedCluster")
   135  		os.Exit(1)
   136  	}
   137  	if props.EnableQuickCreate {
   138  		if err = (&ociocne.ClusterReconciler{
   139  			Base: &controller.Base{
   140  				Client: mgr.GetClient(),
   141  			},
   142  			Scheme:            mgr.GetScheme(),
   143  			CredentialsLoader: oci.CredentialsLoaderImpl{},
   144  			OCIClientGetter: func(credentials *oci.Credentials) (oci.Client, error) {
   145  				return oci.NewClient(credentials)
   146  			},
   147  		}).SetupWithManager(mgr); err != nil {
   148  			log.Error(err, "Failed to setup controller OCNEOCIQuickCreate")
   149  			os.Exit(1)
   150  		}
   151  		if err = (&oke.ClusterReconciler{
   152  			Base: &controller.Base{
   153  				Client: mgr.GetClient(),
   154  			},
   155  			CredentialsLoader: oci.CredentialsLoaderImpl{},
   156  			OCIClientGetter: func(credentials *oci.Credentials) (oci.Client, error) {
   157  				return oci.NewClient(credentials)
   158  			},
   159  			Scheme: mgr.GetScheme(),
   160  		}).SetupWithManager(mgr); err != nil {
   161  			log.Error(err, "Failed to setup controller OKEQuickCreate")
   162  			os.Exit(1)
   163  		}
   164  	}
   165  
   166  	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
   167  		log.Error(err, "unable to set up health check")
   168  		os.Exit(1)
   169  	}
   170  	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
   171  		log.Error(err, "unable to set up ready check")
   172  		os.Exit(1)
   173  	}
   174  
   175  	// wrap the controller context with a new context so we can cancel the context if we detect
   176  	// a change in the clusters.management.cattle.io CRD installation
   177  	ctx, cancel := context.WithCancel(ctrl.SetupSignalHandler())
   178  	go watchCattleClustersCRD(cancel, apiextv1Client, crdInstalled, log)
   179  
   180  	go startMetricsServer(log)
   181  
   182  	log.Info("starting manager")
   183  	if err := mgr.Start(ctx); err != nil {
   184  		log.Error(err, "problem running manager")
   185  		os.Exit(1)
   186  	}
   187  	return nil
   188  }
   189  
   190  // shouldSyncClusters returns true if Rancher cluster synchronization is enabled. An optional
   191  // user-specified label selector can be used to filter the Rancher clusters. If sync is enabled and
   192  // the label selector is nil, we will sync all Rancher clusters.
   193  func shouldSyncClusters(clusterSelectorFile string) (bool, *metav1.LabelSelector, error) {
   194  	enabled := os.Getenv(syncClustersEnvVarName)
   195  	if enabled == "" || strings.ToLower(enabled) != "true" {
   196  		return false, nil, nil
   197  	}
   198  
   199  	f, err := os.Stat(clusterSelectorFile)
   200  	if err != nil || f.Size() == 0 {
   201  		return true, nil, nil
   202  	}
   203  
   204  	b, err := os.ReadFile(clusterSelectorFile)
   205  	if err != nil {
   206  		return true, nil, err
   207  	}
   208  
   209  	selector := &metav1.LabelSelector{}
   210  	err = yaml.Unmarshal(b, selector)
   211  	if err != nil {
   212  		return true, nil, err
   213  	}
   214  
   215  	return true, selector, err
   216  }
   217  
   218  // isCRDInstalled returns true if the clusters.management.cattle.io CRD is installed
   219  func isCRDInstalled(client apiextv1.ApiextensionsV1Interface, crdName string) (bool, error) {
   220  	_, err := client.CustomResourceDefinitions().Get(context.TODO(), crdName, metav1.GetOptions{})
   221  	if k8serrors.IsNotFound(err) {
   222  		return false, nil
   223  	}
   224  	if err != nil {
   225  		return false, err
   226  	}
   227  
   228  	return true, nil
   229  }
   230  
   231  // watchCattleClustersCRD periodically checks to see if the clusters.management.cattle.io CRD is installed. If it detects a change
   232  // it will call the context cancel function which will cause the operator to gracefully shut down. The operator will then be
   233  // restarted by Kubernetes and it will start the cattle clusters sync controller if the CRD is installed.
   234  func watchCattleClustersCRD(cancel context.CancelFunc, client apiextv1.ApiextensionsV1Interface, crdInstalled bool, log *zap.SugaredLogger) {
   235  	log.Infof("Watching for CRD %s to be installed or uninstalled", cattleClustersCRDName)
   236  	for {
   237  		installed, err := isCRDInstalled(client, cattleClustersCRDName)
   238  		if err != nil {
   239  			log.Debugf("Unable to determine if CRD %s is installed: %v", cattleClustersCRDName, err)
   240  			continue
   241  		}
   242  		if installed != crdInstalled {
   243  			log.Infof("Detected CRD %s was installed or uninstalled, shutting down operator", cattleClustersCRDName)
   244  			cancel()
   245  			return
   246  		}
   247  		time.Sleep(10 * time.Second)
   248  	}
   249  }
   250  
   251  // startMetricsServer initializes the HTTP listener for the metrics server
   252  func startMetricsServer(log *zap.SugaredLogger) {
   253  	// Start up the Prometheus Metrics Exporter server to emit operator metrics
   254  	http.Handle("/metrics", promhttp.Handler())
   255  	server := &http.Server{
   256  		ReadTimeout:  10 * time.Second,
   257  		WriteTimeout: 10 * time.Second,
   258  		Addr:         ":9100",
   259  	}
   260  	for err := server.ListenAndServe(); err != nil; err = server.ListenAndServe() {
   261  		log.Debugf("Failed to start the metrics server on port 9100: %v", err)
   262  		time.Sleep(10 * time.Second)
   263  	}
   264  }