sigs.k8s.io/cluster-api@v1.7.1/exp/internal/controllers/machinepool_controller.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	corev1 "k8s.io/api/core/v1"
    26  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    29  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    30  	"k8s.io/client-go/tools/record"
    31  	"k8s.io/klog/v2"
    32  	ctrl "sigs.k8s.io/controller-runtime"
    33  	"sigs.k8s.io/controller-runtime/pkg/builder"
    34  	"sigs.k8s.io/controller-runtime/pkg/client"
    35  	"sigs.k8s.io/controller-runtime/pkg/controller"
    36  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    37  	"sigs.k8s.io/controller-runtime/pkg/handler"
    38  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    39  
    40  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    41  	"sigs.k8s.io/cluster-api/api/v1beta1/index"
    42  	"sigs.k8s.io/cluster-api/controllers/external"
    43  	"sigs.k8s.io/cluster-api/controllers/remote"
    44  	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    45  	"sigs.k8s.io/cluster-api/internal/util/ssa"
    46  	"sigs.k8s.io/cluster-api/util"
    47  	"sigs.k8s.io/cluster-api/util/annotations"
    48  	"sigs.k8s.io/cluster-api/util/conditions"
    49  	"sigs.k8s.io/cluster-api/util/patch"
    50  	"sigs.k8s.io/cluster-api/util/predicates"
    51  )
    52  
    53  // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
    54  // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch
    55  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
    56  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete
    57  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status;machinepools/finalizers,verbs=get;list;watch;create;update;patch;delete
    58  
    59  var (
    60  	// machinePoolKind contains the schema.GroupVersionKind for the MachinePool type.
    61  	machinePoolKind = clusterv1.GroupVersion.WithKind("MachinePool")
    62  )
    63  
    64  const (
    65  	// MachinePoolControllerName defines the controller used when creating clients.
    66  	MachinePoolControllerName = "machinepool-controller"
    67  )
    68  
    69  // MachinePoolReconciler reconciles a MachinePool object.
    70  type MachinePoolReconciler struct {
    71  	Client    client.Client
    72  	APIReader client.Reader
    73  	Tracker   *remote.ClusterCacheTracker
    74  
    75  	// WatchFilterValue is the label value used to filter events prior to reconciliation.
    76  	WatchFilterValue string
    77  
    78  	controller      controller.Controller
    79  	ssaCache        ssa.Cache
    80  	recorder        record.EventRecorder
    81  	externalTracker external.ObjectTracker
    82  }
    83  
    84  func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
    85  	clusterToMachinePools, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &expv1.MachinePoolList{}, mgr.GetScheme())
    86  	if err != nil {
    87  		return err
    88  	}
    89  
    90  	c, err := ctrl.NewControllerManagedBy(mgr).
    91  		For(&expv1.MachinePool{}).
    92  		WithOptions(options).
    93  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
    94  		Watches(
    95  			&clusterv1.Cluster{},
    96  			handler.EnqueueRequestsFromMapFunc(clusterToMachinePools),
    97  			// TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources?
    98  			builder.WithPredicates(
    99  				predicates.All(ctrl.LoggerFrom(ctx),
   100  					predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)),
   101  					predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue),
   102  				),
   103  			),
   104  		).
   105  		Build(r)
   106  	if err != nil {
   107  		return errors.Wrap(err, "failed setting up with a controller manager")
   108  	}
   109  
   110  	r.controller = c
   111  	r.recorder = mgr.GetEventRecorderFor("machinepool-controller")
   112  	r.externalTracker = external.ObjectTracker{
   113  		Controller: c,
   114  		Cache:      mgr.GetCache(),
   115  	}
   116  	r.ssaCache = ssa.NewCache()
   117  
   118  	return nil
   119  }
   120  
   121  func (r *MachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   122  	log := ctrl.LoggerFrom(ctx)
   123  
   124  	mp := &expv1.MachinePool{}
   125  	if err := r.Client.Get(ctx, req.NamespacedName, mp); err != nil {
   126  		if apierrors.IsNotFound(err) {
   127  			// Object not found, return. Created objects are automatically garbage collected.
   128  			// For additional cleanup logic use finalizers.
   129  			return ctrl.Result{}, nil
   130  		}
   131  		log.Error(err, "Error reading the object - requeue the request.")
   132  		return ctrl.Result{}, err
   133  	}
   134  
   135  	log = log.WithValues("Cluster", klog.KRef(mp.ObjectMeta.Namespace, mp.Spec.ClusterName))
   136  	ctx = ctrl.LoggerInto(ctx, log)
   137  
   138  	cluster, err := util.GetClusterByName(ctx, r.Client, mp.ObjectMeta.Namespace, mp.Spec.ClusterName)
   139  	if err != nil {
   140  		log.Error(err, "Failed to get Cluster for MachinePool.", "MachinePool", klog.KObj(mp), "Cluster", klog.KRef(mp.ObjectMeta.Namespace, mp.Spec.ClusterName))
   141  		return ctrl.Result{}, errors.Wrapf(err, "failed to get cluster %q for machinepool %q in namespace %q",
   142  			mp.Spec.ClusterName, mp.Name, mp.Namespace)
   143  	}
   144  
   145  	// Return early if the object or Cluster is paused.
   146  	if annotations.IsPaused(cluster, mp) {
   147  		log.Info("Reconciliation is paused for this object")
   148  		return ctrl.Result{}, nil
   149  	}
   150  
   151  	// Initialize the patch helper.
   152  	patchHelper, err := patch.NewHelper(mp, r.Client)
   153  	if err != nil {
   154  		return ctrl.Result{}, err
   155  	}
   156  
   157  	defer func() {
   158  		r.reconcilePhase(mp)
   159  		// TODO(jpang): add support for metrics.
   160  
   161  		// Always update the readyCondition with the summary of the machinepool conditions.
   162  		conditions.SetSummary(mp,
   163  			conditions.WithConditions(
   164  				clusterv1.BootstrapReadyCondition,
   165  				clusterv1.InfrastructureReadyCondition,
   166  				expv1.ReplicasReadyCondition,
   167  			),
   168  		)
   169  
   170  		// Always attempt to patch the object and status after each reconciliation.
   171  		// Patch ObservedGeneration only if the reconciliation completed successfully
   172  		patchOpts := []patch.Option{
   173  			patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   174  				clusterv1.ReadyCondition,
   175  				clusterv1.BootstrapReadyCondition,
   176  				clusterv1.InfrastructureReadyCondition,
   177  				expv1.ReplicasReadyCondition,
   178  			}},
   179  		}
   180  		if reterr == nil {
   181  			patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{})
   182  		}
   183  		if err := patchHelper.Patch(ctx, mp, patchOpts...); err != nil {
   184  			reterr = kerrors.NewAggregate([]error{reterr, err})
   185  		}
   186  	}()
   187  
   188  	// Reconcile labels.
   189  	if mp.Labels == nil {
   190  		mp.Labels = make(map[string]string)
   191  	}
   192  	mp.Labels[clusterv1.ClusterNameLabel] = mp.Spec.ClusterName
   193  
   194  	// Handle deletion reconciliation loop.
   195  	if !mp.ObjectMeta.DeletionTimestamp.IsZero() {
   196  		err := r.reconcileDelete(ctx, cluster, mp)
   197  		// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   198  		// the current cluster because of concurrent access.
   199  		if errors.Is(err, remote.ErrClusterLocked) {
   200  			log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   201  			return ctrl.Result{RequeueAfter: time.Minute}, nil
   202  		}
   203  		return ctrl.Result{}, err
   204  	}
   205  
   206  	// Add finalizer first if not set to avoid the race condition between init and delete.
   207  	// Note: Finalizers in general can only be added when the deletionTimestamp is not set.
   208  	if !controllerutil.ContainsFinalizer(mp, expv1.MachinePoolFinalizer) {
   209  		controllerutil.AddFinalizer(mp, expv1.MachinePoolFinalizer)
   210  		return ctrl.Result{}, nil
   211  	}
   212  
   213  	// Handle normal reconciliation loop.
   214  	res, err := r.reconcile(ctx, cluster, mp)
   215  	// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   216  	// the current cluster because of concurrent access.
   217  	if errors.Is(err, remote.ErrClusterLocked) {
   218  		log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   219  		return ctrl.Result{RequeueAfter: time.Minute}, nil
   220  	}
   221  	return res, err
   222  }
   223  
   224  func (r *MachinePoolReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) (ctrl.Result, error) {
   225  	// Ensure the MachinePool is owned by the Cluster it belongs to.
   226  	mp.SetOwnerReferences(util.EnsureOwnerRef(mp.GetOwnerReferences(), metav1.OwnerReference{
   227  		APIVersion: clusterv1.GroupVersion.String(),
   228  		Kind:       "Cluster",
   229  		Name:       cluster.Name,
   230  		UID:        cluster.UID,
   231  	}))
   232  
   233  	phases := []func(context.Context, *clusterv1.Cluster, *expv1.MachinePool) (ctrl.Result, error){
   234  		r.reconcileBootstrap,
   235  		r.reconcileInfrastructure,
   236  		r.reconcileNodeRefs,
   237  	}
   238  
   239  	res := ctrl.Result{}
   240  	errs := []error{}
   241  	for _, phase := range phases {
   242  		// Call the inner reconciliation methods.
   243  		phaseResult, err := phase(ctx, cluster, mp)
   244  		if err != nil {
   245  			errs = append(errs, err)
   246  		}
   247  		if len(errs) > 0 {
   248  			continue
   249  		}
   250  
   251  		res = util.LowestNonZeroResult(res, phaseResult)
   252  	}
   253  	return res, kerrors.NewAggregate(errs)
   254  }
   255  
   256  func (r *MachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) error {
   257  	if ok, err := r.reconcileDeleteExternal(ctx, mp); !ok || err != nil {
   258  		// Return early and don't remove the finalizer if we got an error or
   259  		// the external reconciliation deletion isn't ready.
   260  		return err
   261  	}
   262  
   263  	if err := r.reconcileDeleteNodes(ctx, cluster, mp); err != nil {
   264  		// Return early and don't remove the finalizer if we got an error.
   265  		return err
   266  	}
   267  
   268  	controllerutil.RemoveFinalizer(mp, expv1.MachinePoolFinalizer)
   269  	return nil
   270  }
   271  
   272  func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluster *clusterv1.Cluster, machinepool *expv1.MachinePool) error {
   273  	if len(machinepool.Status.NodeRefs) == 0 {
   274  		return nil
   275  	}
   276  
   277  	clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   278  	if err != nil {
   279  		return err
   280  	}
   281  
   282  	return r.deleteRetiredNodes(ctx, clusterClient, machinepool.Status.NodeRefs, machinepool.Spec.ProviderIDList)
   283  }
   284  
   285  // reconcileDeleteExternal tries to delete external references, returning true if it cannot find any.
   286  func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m *expv1.MachinePool) (bool, error) {
   287  	objects := []*unstructured.Unstructured{}
   288  	references := []*corev1.ObjectReference{
   289  		m.Spec.Template.Spec.Bootstrap.ConfigRef,
   290  		&m.Spec.Template.Spec.InfrastructureRef,
   291  	}
   292  
   293  	// Loop over the references and try to retrieve it with the client.
   294  	for _, ref := range references {
   295  		if ref == nil {
   296  			continue
   297  		}
   298  
   299  		obj, err := external.Get(ctx, r.Client, ref, m.Namespace)
   300  		if err != nil && !apierrors.IsNotFound(errors.Cause(err)) {
   301  			return false, errors.Wrapf(err, "failed to get %s %q for MachinePool %q in namespace %q",
   302  				ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
   303  		}
   304  		if obj != nil {
   305  			objects = append(objects, obj)
   306  		}
   307  	}
   308  
   309  	// Issue a delete request for any object that has been found.
   310  	for _, obj := range objects {
   311  		if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) {
   312  			return false, errors.Wrapf(err,
   313  				"failed to delete %v %q for MachinePool %q in namespace %q",
   314  				obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace)
   315  		}
   316  	}
   317  
   318  	// Return true if there are no more external objects.
   319  	return len(objects) == 0, nil
   320  }
   321  
   322  func (r *MachinePoolReconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error {
   323  	log := ctrl.LoggerFrom(ctx)
   324  
   325  	if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
   326  		log.V(5).Info("Skipping node watching setup because control plane is not initialized")
   327  		return nil
   328  	}
   329  
   330  	// If there is no tracker, don't watch remote nodes
   331  	if r.Tracker == nil {
   332  		return nil
   333  	}
   334  
   335  	return r.Tracker.Watch(ctx, remote.WatchInput{
   336  		Name:         "machinepool-watchNodes",
   337  		Cluster:      util.ObjectKey(cluster),
   338  		Watcher:      r.controller,
   339  		Kind:         &corev1.Node{},
   340  		EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachinePool),
   341  	})
   342  }
   343  
   344  func (r *MachinePoolReconciler) nodeToMachinePool(ctx context.Context, o client.Object) []reconcile.Request {
   345  	node, ok := o.(*corev1.Node)
   346  	if !ok {
   347  		panic(fmt.Sprintf("Expected a Node but got a %T", o))
   348  	}
   349  
   350  	var filters []client.ListOption
   351  	// Match by clusterName when the node has the annotation.
   352  	if clusterName, ok := node.GetAnnotations()[clusterv1.ClusterNameAnnotation]; ok {
   353  		filters = append(filters, client.MatchingLabels{
   354  			clusterv1.ClusterNameLabel: clusterName,
   355  		})
   356  	}
   357  
   358  	// Match by namespace when the node has the annotation.
   359  	if namespace, ok := node.GetAnnotations()[clusterv1.ClusterNamespaceAnnotation]; ok {
   360  		filters = append(filters, client.InNamespace(namespace))
   361  	}
   362  
   363  	// Match by nodeName and status.nodeRef.name.
   364  	machinePoolList := &expv1.MachinePoolList{}
   365  	if err := r.Client.List(
   366  		ctx,
   367  		machinePoolList,
   368  		append(filters, client.MatchingFields{index.MachinePoolNodeNameField: node.Name})...); err != nil {
   369  		return nil
   370  	}
   371  
   372  	// There should be exactly 1 MachinePool for the node.
   373  	if len(machinePoolList.Items) == 1 {
   374  		return []reconcile.Request{{NamespacedName: util.ObjectKey(&machinePoolList.Items[0])}}
   375  	}
   376  
   377  	// Otherwise let's match by providerID. This is useful when e.g the NodeRef has not been set yet.
   378  	// Match by providerID
   379  	if node.Spec.ProviderID == "" {
   380  		return nil
   381  	}
   382  	machinePoolList = &expv1.MachinePoolList{}
   383  	if err := r.Client.List(
   384  		ctx,
   385  		machinePoolList,
   386  		append(filters, client.MatchingFields{index.MachinePoolProviderIDField: node.Spec.ProviderID})...); err != nil {
   387  		return nil
   388  	}
   389  
   390  	// There should be exactly 1 MachinePool for the node.
   391  	if len(machinePoolList.Items) == 1 {
   392  		return []reconcile.Request{{NamespacedName: util.ObjectKey(&machinePoolList.Items[0])}}
   393  	}
   394  
   395  	return nil
   396  }