github.com/verrazzano/verrazzano@v1.7.1/cluster-operator/internal/operatorinit/run_operator.go (about) 1 // Copyright (c) 2022, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package operatorinit 5 6 import ( 7 "context" 8 "github.com/pkg/errors" 9 "github.com/prometheus/client_golang/prometheus/promhttp" 10 "github.com/verrazzano/verrazzano/cluster-operator/controllers/capi" 11 "github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/controller" 12 "github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/controller/oci" 13 "github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/ociocne" 14 "github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/oke" 15 "github.com/verrazzano/verrazzano/cluster-operator/controllers/rancher" 16 "github.com/verrazzano/verrazzano/cluster-operator/controllers/vmc" 17 "github.com/verrazzano/verrazzano/pkg/k8sutil" 18 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 19 "github.com/verrazzano/verrazzano/pkg/nginxutil" 20 "github.com/verrazzano/verrazzano/pkg/rancherutil" 21 "go.uber.org/zap" 22 apiextv1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" 23 k8serrors "k8s.io/apimachinery/pkg/api/errors" 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 "k8s.io/apimachinery/pkg/runtime" 26 "net/http" 27 "os" 28 ctrl "sigs.k8s.io/controller-runtime" 29 "sigs.k8s.io/controller-runtime/pkg/healthz" 30 "sigs.k8s.io/yaml" 31 "strings" 32 "time" 33 ) 34 35 const ( 36 clusterSelectorFilePath = "/var/syncClusters/selector.yaml" 37 syncClustersEnvVarName = "CLUSTER_SYNC_ENABLED" 38 cattleClustersCRDName = "clusters.management.cattle.io" 39 capiClustersCRDName = "clusters.cluster.x-k8s.io" 40 ) 41 42 type Properties struct { 43 Scheme *runtime.Scheme 44 CertificateDir string 45 MetricsAddress string 46 ProbeAddress string 47 IngressHost string 48 EnableLeaderElection bool 49 EnableQuickCreate bool 50 DisableCAPIRancherRegistration bool 51 } 52 53 // StartClusterOperator Cluster operator execution entry point 54 func StartClusterOperator(log *zap.SugaredLogger, props Properties) error { 55 options := ctrl.Options{ 56 Scheme: props.Scheme, 57 MetricsBindAddress: props.MetricsAddress, 58 Port: 9443, 59 HealthProbeBindAddress: props.ProbeAddress, 60 LeaderElection: props.EnableLeaderElection, 61 LeaderElectionID: "42d5ea87.verrazzano.io", 62 } 63 64 ingressNGINXNamespace, err := nginxutil.DetermineNamespaceForIngressNGINX(vzlog.DefaultLogger()) 65 if err != nil { 66 return err 67 } 68 nginxutil.SetIngressNGINXNamespace(ingressNGINXNamespace) 69 70 ctrlConfig := k8sutil.GetConfigOrDieFromController() 71 mgr, err := ctrl.NewManager(ctrlConfig, options) 72 if err != nil { 73 return errors.Wrapf(err, "Failed to setup controller manager") 74 } 75 76 apiextv1Client := apiextv1.NewForConfigOrDie(ctrlConfig) 77 crdInstalled, err := isCRDInstalled(apiextv1Client, cattleClustersCRDName) 78 if err != nil { 79 log.Error(err, "unable to determine if cattle CRD is installed") 80 os.Exit(1) 81 } 82 83 // only start the Rancher cluster sync controller if the cattle clusters CRD is installed 84 if crdInstalled { 85 syncEnabled, clusterSelector, err := shouldSyncClusters(clusterSelectorFilePath) 86 if err != nil { 87 log.Error(err, "error processing cluster sync config") 88 os.Exit(1) 89 } 90 91 if err = (&rancher.RancherClusterReconciler{ 92 Client: mgr.GetClient(), 93 ClusterSyncEnabled: syncEnabled, 94 ClusterSelector: clusterSelector, 95 Log: log, 96 Scheme: mgr.GetScheme(), 97 }).SetupWithManager(mgr); err != nil { 98 log.Errorf("Failed to create Rancher cluster controller: %v", err) 99 os.Exit(1) 100 } 101 } 102 103 capiCrdInstalled, err := isCRDInstalled(apiextv1Client, capiClustersCRDName) 104 if err != nil { 105 log.Error(err, "unable to determine if CAPI CRD is installed") 106 os.Exit(1) 107 } 108 109 if props.IngressHost == "" { 110 props.IngressHost = rancherutil.DefaultRancherIngressHostPrefix + nginxutil.IngressNGINXNamespace() 111 } 112 113 // only start the CAPI cluster controller if the clusters CRD is installed and the controller is enabled 114 if capiCrdInstalled && !props.DisableCAPIRancherRegistration { 115 log.Infof("Starting CAPI Cluster controller") 116 if err = (&capi.CAPIClusterReconciler{ 117 Client: mgr.GetClient(), 118 Log: log, 119 Scheme: mgr.GetScheme(), 120 RancherIngressHost: props.IngressHost, 121 RancherEnabled: crdInstalled, 122 }).SetupWithManager(mgr); err != nil { 123 log.Errorf("Failed to create CAPI cluster controller: %v", err) 124 os.Exit(1) 125 } 126 } 127 128 // Set up the reconciler for VerrazzanoManagedCluster objects 129 if err = (&vmc.VerrazzanoManagedClusterReconciler{ 130 Client: mgr.GetClient(), 131 Scheme: mgr.GetScheme(), 132 RancherIngressHost: props.IngressHost, 133 }).SetupWithManager(mgr); err != nil { 134 log.Error(err, "Failed to setup controller VerrazzanoManagedCluster") 135 os.Exit(1) 136 } 137 if props.EnableQuickCreate { 138 if err = (&ociocne.ClusterReconciler{ 139 Base: &controller.Base{ 140 Client: mgr.GetClient(), 141 }, 142 Scheme: mgr.GetScheme(), 143 CredentialsLoader: oci.CredentialsLoaderImpl{}, 144 OCIClientGetter: func(credentials *oci.Credentials) (oci.Client, error) { 145 return oci.NewClient(credentials) 146 }, 147 }).SetupWithManager(mgr); err != nil { 148 log.Error(err, "Failed to setup controller OCNEOCIQuickCreate") 149 os.Exit(1) 150 } 151 if err = (&oke.ClusterReconciler{ 152 Base: &controller.Base{ 153 Client: mgr.GetClient(), 154 }, 155 CredentialsLoader: oci.CredentialsLoaderImpl{}, 156 OCIClientGetter: func(credentials *oci.Credentials) (oci.Client, error) { 157 return oci.NewClient(credentials) 158 }, 159 Scheme: mgr.GetScheme(), 160 }).SetupWithManager(mgr); err != nil { 161 log.Error(err, "Failed to setup controller OKEQuickCreate") 162 os.Exit(1) 163 } 164 } 165 166 if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { 167 log.Error(err, "unable to set up health check") 168 os.Exit(1) 169 } 170 if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { 171 log.Error(err, "unable to set up ready check") 172 os.Exit(1) 173 } 174 175 // wrap the controller context with a new context so we can cancel the context if we detect 176 // a change in the clusters.management.cattle.io CRD installation 177 ctx, cancel := context.WithCancel(ctrl.SetupSignalHandler()) 178 go watchCattleClustersCRD(cancel, apiextv1Client, crdInstalled, log) 179 180 go startMetricsServer(log) 181 182 log.Info("starting manager") 183 if err := mgr.Start(ctx); err != nil { 184 log.Error(err, "problem running manager") 185 os.Exit(1) 186 } 187 return nil 188 } 189 190 // shouldSyncClusters returns true if Rancher cluster synchronization is enabled. An optional 191 // user-specified label selector can be used to filter the Rancher clusters. If sync is enabled and 192 // the label selector is nil, we will sync all Rancher clusters. 193 func shouldSyncClusters(clusterSelectorFile string) (bool, *metav1.LabelSelector, error) { 194 enabled := os.Getenv(syncClustersEnvVarName) 195 if enabled == "" || strings.ToLower(enabled) != "true" { 196 return false, nil, nil 197 } 198 199 f, err := os.Stat(clusterSelectorFile) 200 if err != nil || f.Size() == 0 { 201 return true, nil, nil 202 } 203 204 b, err := os.ReadFile(clusterSelectorFile) 205 if err != nil { 206 return true, nil, err 207 } 208 209 selector := &metav1.LabelSelector{} 210 err = yaml.Unmarshal(b, selector) 211 if err != nil { 212 return true, nil, err 213 } 214 215 return true, selector, err 216 } 217 218 // isCRDInstalled returns true if the clusters.management.cattle.io CRD is installed 219 func isCRDInstalled(client apiextv1.ApiextensionsV1Interface, crdName string) (bool, error) { 220 _, err := client.CustomResourceDefinitions().Get(context.TODO(), crdName, metav1.GetOptions{}) 221 if k8serrors.IsNotFound(err) { 222 return false, nil 223 } 224 if err != nil { 225 return false, err 226 } 227 228 return true, nil 229 } 230 231 // watchCattleClustersCRD periodically checks to see if the clusters.management.cattle.io CRD is installed. If it detects a change 232 // it will call the context cancel function which will cause the operator to gracefully shut down. The operator will then be 233 // restarted by Kubernetes and it will start the cattle clusters sync controller if the CRD is installed. 234 func watchCattleClustersCRD(cancel context.CancelFunc, client apiextv1.ApiextensionsV1Interface, crdInstalled bool, log *zap.SugaredLogger) { 235 log.Infof("Watching for CRD %s to be installed or uninstalled", cattleClustersCRDName) 236 for { 237 installed, err := isCRDInstalled(client, cattleClustersCRDName) 238 if err != nil { 239 log.Debugf("Unable to determine if CRD %s is installed: %v", cattleClustersCRDName, err) 240 continue 241 } 242 if installed != crdInstalled { 243 log.Infof("Detected CRD %s was installed or uninstalled, shutting down operator", cattleClustersCRDName) 244 cancel() 245 return 246 } 247 time.Sleep(10 * time.Second) 248 } 249 } 250 251 // startMetricsServer initializes the HTTP listener for the metrics server 252 func startMetricsServer(log *zap.SugaredLogger) { 253 // Start up the Prometheus Metrics Exporter server to emit operator metrics 254 http.Handle("/metrics", promhttp.Handler()) 255 server := &http.Server{ 256 ReadTimeout: 10 * time.Second, 257 WriteTimeout: 10 * time.Second, 258 Addr: ":9100", 259 } 260 for err := server.ListenAndServe(); err != nil; err = server.ListenAndServe() { 261 log.Debugf("Failed to start the metrics server on port 9100: %v", err) 262 time.Sleep(10 * time.Second) 263 } 264 }