github.com/k8snetworkplumbingwg/sriov-network-operator@v1.2.1-0.20240408194816-2d2e5a45d453/controllers/sriovoperatorconfig_controller.go (about)

     1  /*
     2  Copyright 2021.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"strings"
    24  
    25  	appsv1 "k8s.io/api/apps/v1"
    26  	corev1 "k8s.io/api/core/v1"
    27  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    28  	uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    29  	"k8s.io/apimachinery/pkg/runtime"
    30  	"k8s.io/apimachinery/pkg/types"
    31  	kscheme "k8s.io/client-go/kubernetes/scheme"
    32  	ctrl "sigs.k8s.io/controller-runtime"
    33  	ctrl_builder "sigs.k8s.io/controller-runtime/pkg/builder"
    34  	"sigs.k8s.io/controller-runtime/pkg/client"
    35  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    36  	"sigs.k8s.io/controller-runtime/pkg/log"
    37  	"sigs.k8s.io/controller-runtime/pkg/predicate"
    38  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    39  
    40  	machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
    41  
    42  	sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
    43  	apply "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply"
    44  	consts "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
    45  	"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
    46  	snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
    47  	"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms"
    48  	render "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render"
    49  	"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
    50  )
    51  
    52  // SriovOperatorConfigReconciler reconciles a SriovOperatorConfig object
    53  type SriovOperatorConfigReconciler struct {
    54  	client.Client
    55  	Scheme         *runtime.Scheme
    56  	PlatformHelper platforms.Interface
    57  	FeatureGate    featuregate.FeatureGate
    58  }
    59  
    60  //+kubebuilder:rbac:groups=sriovnetwork.openshift.io,resources=sriovoperatorconfigs,verbs=get;list;watch;create;update;patch;delete
    61  //+kubebuilder:rbac:groups=sriovnetwork.openshift.io,resources=sriovoperatorconfigs/status,verbs=get;update;patch
    62  //+kubebuilder:rbac:groups=sriovnetwork.openshift.io,resources=sriovoperatorconfigs/finalizers,verbs=update
    63  
    64  // Reconcile is part of the main kubernetes reconciliation loop which aims to
    65  // move the current state of the cluster closer to the desired state.
    66  // TODO(user): Modify the Reconcile function to compare the state specified by
    67  // the SriovOperatorConfig object against the actual cluster state, and then
    68  // perform operations to make the cluster state reflect the state specified by
    69  // the user.
    70  //
    71  // For more details, check Reconcile and its Result here:
    72  // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.8.3/pkg/reconcile
    73  func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    74  	logger := log.FromContext(ctx).WithValues("sriovoperatorconfig", req.NamespacedName)
    75  	logger.Info("Reconciling SriovOperatorConfig")
    76  
    77  	// Note: in SetupWithManager we setup manager to enqueue only default config obj
    78  	defaultConfig := &sriovnetworkv1.SriovOperatorConfig{}
    79  	err := r.Get(ctx, req.NamespacedName, defaultConfig)
    80  	if err != nil {
    81  		if apierrors.IsNotFound(err) {
    82  			logger.Info("default SriovOperatorConfig object not found. waiting for creation.")
    83  			return reconcile.Result{}, nil
    84  		}
    85  		// Error reading the object - requeue the request.
    86  		logger.Error(err, "Failed to get default SriovOperatorConfig object")
    87  		return reconcile.Result{}, err
    88  	}
    89  
    90  	snolog.SetLogLevel(defaultConfig.Spec.LogLevel)
    91  
    92  	r.FeatureGate.Init(defaultConfig.Spec.FeatureGates)
    93  	logger.Info("enabled featureGates", "featureGates", r.FeatureGate.String())
    94  
    95  	if !defaultConfig.Spec.EnableInjector {
    96  		logger.Info("SR-IOV Network Resource Injector is disabled.")
    97  	}
    98  
    99  	if !defaultConfig.Spec.EnableOperatorWebhook {
   100  		logger.Info("SR-IOV Network Operator Webhook is disabled.")
   101  	}
   102  
   103  	// Fetch the SriovNetworkNodePolicyList
   104  	policyList := &sriovnetworkv1.SriovNetworkNodePolicyList{}
   105  	err = r.List(ctx, policyList, &client.ListOptions{})
   106  	if err != nil {
   107  		// Error reading the object - requeue the request.
   108  		return reconcile.Result{}, err
   109  	}
   110  
   111  	// Render and sync webhook objects
   112  	if err = r.syncWebhookObjs(ctx, defaultConfig); err != nil {
   113  		return reconcile.Result{}, err
   114  	}
   115  
   116  	// Sync SriovNetworkConfigDaemon objects
   117  	if err = r.syncConfigDaemonSet(ctx, defaultConfig); err != nil {
   118  		return reconcile.Result{}, err
   119  	}
   120  
   121  	if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig, policyList); err != nil {
   122  		return reconcile.Result{}, err
   123  	}
   124  
   125  	// For Openshift we need to create the systemd files using a machine config
   126  	if vars.ClusterType == consts.ClusterTypeOpenshift {
   127  		// TODO: add support for hypershift as today there is no MCO on hypershift clusters
   128  		if r.PlatformHelper.IsHypershift() {
   129  			return ctrl.Result{}, fmt.Errorf("systemd mode is not supported on hypershift")
   130  		}
   131  
   132  		if err = r.syncOpenShiftSystemdService(ctx, defaultConfig); err != nil {
   133  			return reconcile.Result{}, err
   134  		}
   135  	}
   136  
   137  	logger.Info("Reconcile SriovOperatorConfig completed successfully")
   138  	return reconcile.Result{RequeueAfter: consts.ResyncPeriod}, nil
   139  }
   140  
   141  // defaultConfigPredicate creates a predicate.Predicate that will return true
   142  // only for the default sriovoperatorconfig obj.
   143  func defaultConfigPredicate() predicate.Predicate {
   144  	return predicate.NewPredicateFuncs(func(object client.Object) bool {
   145  		if object.GetName() == consts.DefaultConfigName && object.GetNamespace() == vars.Namespace {
   146  			return true
   147  		}
   148  		return false
   149  	})
   150  }
   151  
   152  // SetupWithManager sets up the controller with the Manager.
   153  func (r *SriovOperatorConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
   154  	return ctrl.NewControllerManagedBy(mgr).
   155  		For(&sriovnetworkv1.SriovOperatorConfig{}, ctrl_builder.WithPredicates(defaultConfigPredicate())).
   156  		Owns(&appsv1.DaemonSet{}).
   157  		Owns(&corev1.ConfigMap{}).
   158  		Complete(r)
   159  }
   160  
   161  func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig) error {
   162  	logger := log.Log.WithName("syncConfigDaemonset")
   163  	logger.V(1).Info("Start to sync config daemonset")
   164  
   165  	data := render.MakeRenderData()
   166  	data.Data["Image"] = os.Getenv("SRIOV_NETWORK_CONFIG_DAEMON_IMAGE")
   167  	data.Data["Namespace"] = vars.Namespace
   168  	data.Data["SRIOVCNIImage"] = os.Getenv("SRIOV_CNI_IMAGE")
   169  	data.Data["SRIOVInfiniBandCNIImage"] = os.Getenv("SRIOV_INFINIBAND_CNI_IMAGE")
   170  	data.Data["OVSCNIImage"] = os.Getenv("OVS_CNI_IMAGE")
   171  	data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION")
   172  	data.Data["ClusterType"] = vars.ClusterType
   173  	data.Data["DevMode"] = os.Getenv("DEV_MODE")
   174  	data.Data["ImagePullSecrets"] = GetImagePullSecrets()
   175  	if dc.Spec.ConfigurationMode == sriovnetworkv1.SystemdConfigurationMode {
   176  		data.Data["UsedSystemdMode"] = true
   177  	} else {
   178  		data.Data["UsedSystemdMode"] = false
   179  	}
   180  	data.Data["ParallelNicConfig"] = r.FeatureGate.IsEnabled(consts.ParallelNicConfigFeatureGate)
   181  
   182  	envCniBinPath := os.Getenv("SRIOV_CNI_BIN_PATH")
   183  	if envCniBinPath == "" {
   184  		data.Data["CNIBinPath"] = "/var/lib/cni/bin"
   185  	} else {
   186  		logger.V(1).Info("New cni bin found", "CNIBinPath", envCniBinPath)
   187  		data.Data["CNIBinPath"] = envCniBinPath
   188  	}
   189  
   190  	if len(dc.Spec.DisablePlugins) > 0 {
   191  		logger.V(1).Info("DisablePlugins provided", "DisablePlugins", dc.Spec.DisablePlugins)
   192  		data.Data["DisablePlugins"] = strings.Join(dc.Spec.DisablePlugins.ToStringSlice(), ",")
   193  	}
   194  
   195  	objs, err := render.RenderDir(consts.ConfigDaemonPath, &data)
   196  	if err != nil {
   197  		logger.Error(err, "Fail to render config daemon manifests")
   198  		return err
   199  	}
   200  	// Sync DaemonSets
   201  	for _, obj := range objs {
   202  		if obj.GetKind() == "DaemonSet" && len(dc.Spec.ConfigDaemonNodeSelector) > 0 {
   203  			scheme := kscheme.Scheme
   204  			ds := &appsv1.DaemonSet{}
   205  			err = scheme.Convert(obj, ds, nil)
   206  			if err != nil {
   207  				logger.Error(err, "Fail to convert to DaemonSet")
   208  				return err
   209  			}
   210  			ds.Spec.Template.Spec.NodeSelector = dc.Spec.ConfigDaemonNodeSelector
   211  			err = scheme.Convert(ds, obj, nil)
   212  			if err != nil {
   213  				logger.Error(err, "Fail to convert to Unstructured")
   214  				return err
   215  			}
   216  		}
   217  		err = r.syncK8sResource(ctx, dc, obj)
   218  		if err != nil {
   219  			logger.Error(err, "Couldn't sync SR-IoV daemons objects")
   220  			return err
   221  		}
   222  	}
   223  	return nil
   224  }
   225  
   226  func (r *SriovOperatorConfigReconciler) syncWebhookObjs(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig) error {
   227  	logger := log.Log.WithName("syncWebhookObjs")
   228  	logger.V(1).Info("Start to sync webhook objects")
   229  
   230  	for name, path := range webhooks {
   231  		// Render Webhook manifests
   232  		data := render.MakeRenderData()
   233  		data.Data["Namespace"] = vars.Namespace
   234  		data.Data["SRIOVMutatingWebhookName"] = name
   235  		data.Data["NetworkResourcesInjectorImage"] = os.Getenv("NETWORK_RESOURCES_INJECTOR_IMAGE")
   236  		data.Data["SriovNetworkWebhookImage"] = os.Getenv("SRIOV_NETWORK_WEBHOOK_IMAGE")
   237  		data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION")
   238  		data.Data["ClusterType"] = vars.ClusterType
   239  		data.Data["DevMode"] = os.Getenv("DEV_MODE")
   240  		data.Data["ImagePullSecrets"] = GetImagePullSecrets()
   241  		data.Data["CertManagerEnabled"] = strings.ToLower(os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED")) == trueString
   242  		data.Data["OperatorWebhookSecretName"] = os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_SECRET_NAME")
   243  		data.Data["OperatorWebhookCA"] = os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT")
   244  		data.Data["InjectorWebhookSecretName"] = os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_SECRET_NAME")
   245  		data.Data["InjectorWebhookCA"] = os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT")
   246  
   247  		data.Data["ExternalControlPlane"] = false
   248  		if r.PlatformHelper.IsOpenshiftCluster() {
   249  			external := r.PlatformHelper.IsHypershift()
   250  			data.Data["ExternalControlPlane"] = external
   251  		}
   252  
   253  		// check for ResourceInjectorMatchConditionFeatureGate feature gate
   254  		data.Data[consts.ResourceInjectorMatchConditionFeatureGate] = r.FeatureGate.IsEnabled(consts.ResourceInjectorMatchConditionFeatureGate)
   255  
   256  		objs, err := render.RenderDir(path, &data)
   257  		if err != nil {
   258  			logger.Error(err, "Fail to render webhook manifests")
   259  			return err
   260  		}
   261  
   262  		// Delete injector webhook
   263  		if !dc.Spec.EnableInjector && path == consts.InjectorWebHookPath {
   264  			for _, obj := range objs {
   265  				err = r.deleteWebhookObject(ctx, obj)
   266  				if err != nil {
   267  					return err
   268  				}
   269  			}
   270  			logger.Info("SR-IOV Admission Controller is disabled.")
   271  			logger.Info("To enable SR-IOV Admission Controller,")
   272  			logger.Info("Set 'SriovOperatorConfig.Spec.EnableInjector' to true(bool).")
   273  			continue
   274  		}
   275  		// Delete operator webhook
   276  		if !dc.Spec.EnableOperatorWebhook && path == consts.OperatorWebHookPath {
   277  			for _, obj := range objs {
   278  				err = r.deleteWebhookObject(ctx, obj)
   279  				if err != nil {
   280  					return err
   281  				}
   282  			}
   283  			logger.Info("Operator Admission Controller is disabled.")
   284  			logger.Info("To enable Operator Admission Controller,")
   285  			logger.Info("Set 'SriovOperatorConfig.Spec.EnableOperatorWebhook' to true(bool).")
   286  			continue
   287  		}
   288  
   289  		// Sync Webhook
   290  		for _, obj := range objs {
   291  			err = r.syncK8sResource(ctx, dc, obj)
   292  			if err != nil {
   293  				logger.Error(err, "Couldn't sync webhook objects")
   294  				return err
   295  			}
   296  		}
   297  	}
   298  
   299  	return nil
   300  }
   301  
   302  func (r *SriovOperatorConfigReconciler) deleteWebhookObject(ctx context.Context, obj *uns.Unstructured) error {
   303  	if err := r.deleteK8sResource(ctx, obj); err != nil {
   304  		return err
   305  	}
   306  	return nil
   307  }
   308  
   309  func (r *SriovOperatorConfigReconciler) deleteK8sResource(ctx context.Context, in *uns.Unstructured) error {
   310  	if err := apply.DeleteObject(ctx, r.Client, in); err != nil {
   311  		return fmt.Errorf("failed to delete object %v with err: %v", in, err)
   312  	}
   313  	return nil
   314  }
   315  
   316  func (r *SriovOperatorConfigReconciler) syncK8sResource(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig, in *uns.Unstructured) error {
   317  	switch in.GetKind() {
   318  	case clusterRoleResourceName, clusterRoleBindingResourceName, mutatingWebhookConfigurationCRDName, validatingWebhookConfigurationCRDName, machineConfigCRDName:
   319  	default:
   320  		// set owner-reference only for namespaced objects
   321  		if err := controllerutil.SetControllerReference(cr, in, r.Scheme); err != nil {
   322  			return err
   323  		}
   324  	}
   325  	if err := apply.ApplyObject(ctx, r.Client, in); err != nil {
   326  		return fmt.Errorf("failed to apply object %v with err: %v", in, err)
   327  	}
   328  	return nil
   329  }
   330  
   331  // syncOpenShiftSystemdService creates the Machine Config to deploy the systemd service on openshift ONLY
   332  func (r *SriovOperatorConfigReconciler) syncOpenShiftSystemdService(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig) error {
   333  	logger := log.Log.WithName("syncSystemdService")
   334  
   335  	if cr.Spec.ConfigurationMode != sriovnetworkv1.SystemdConfigurationMode {
   336  		obj := &machinev1.MachineConfig{}
   337  		err := r.Get(context.TODO(), types.NamespacedName{Name: consts.SystemdServiceOcpMachineConfigName}, obj)
   338  		if err != nil {
   339  			if apierrors.IsNotFound(err) {
   340  				return nil
   341  			}
   342  
   343  			logger.Error(err, "failed to get machine config for the sriov-systemd-service")
   344  			return err
   345  		}
   346  
   347  		logger.Info("Systemd service was deployed but the operator is now operating on daemonset mode, removing the machine config")
   348  		err = r.Delete(context.TODO(), obj)
   349  		if err != nil {
   350  			logger.Error(err, "failed to remove the systemd service machine config")
   351  			return err
   352  		}
   353  
   354  		return nil
   355  	}
   356  
   357  	logger.Info("Start to sync config systemd machine config for openshift")
   358  	data := render.MakeRenderData()
   359  	data.Data["LogLevel"] = cr.Spec.LogLevel
   360  	objs, err := render.RenderDir(consts.SystemdServiceOcpPath, &data)
   361  	if err != nil {
   362  		logger.Error(err, "Fail to render config daemon manifests")
   363  		return err
   364  	}
   365  
   366  	// Sync machine config
   367  	return r.setLabelInsideObject(ctx, cr, objs)
   368  }
   369  
   370  func (r SriovOperatorConfigReconciler) setLabelInsideObject(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig, objs []*uns.Unstructured) error {
   371  	logger := log.Log.WithName("setLabelInsideObject")
   372  	for _, obj := range objs {
   373  		if obj.GetKind() == machineConfigCRDName && len(cr.Spec.ConfigDaemonNodeSelector) > 0 {
   374  			scheme := kscheme.Scheme
   375  			mc := &machinev1.ControllerConfig{}
   376  			err := scheme.Convert(obj, mc, nil)
   377  			if err != nil {
   378  				logger.Error(err, "Fail to convert to MachineConfig")
   379  				return err
   380  			}
   381  			mc.Labels = cr.Spec.ConfigDaemonNodeSelector
   382  			err = scheme.Convert(mc, obj, nil)
   383  			if err != nil {
   384  				logger.Error(err, "Fail to convert to Unstructured")
   385  				return err
   386  			}
   387  		}
   388  		err := r.syncK8sResource(ctx, cr, obj)
   389  		if err != nil {
   390  			logger.Error(err, "Couldn't sync SR-IoV daemons objects")
   391  			return err
   392  		}
   393  	}
   394  
   395  	return nil
   396  }