sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/machine/machine_controller_phases.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package machine
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	corev1 "k8s.io/api/core/v1"
    26  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    29  	"k8s.io/apimachinery/pkg/runtime/schema"
    30  	"k8s.io/klog/v2"
    31  	"k8s.io/utils/ptr"
    32  	ctrl "sigs.k8s.io/controller-runtime"
    33  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    34  	"sigs.k8s.io/controller-runtime/pkg/handler"
    35  
    36  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    37  	"sigs.k8s.io/cluster-api/controllers/external"
    38  	capierrors "sigs.k8s.io/cluster-api/errors"
    39  	"sigs.k8s.io/cluster-api/util"
    40  	"sigs.k8s.io/cluster-api/util/annotations"
    41  	"sigs.k8s.io/cluster-api/util/conditions"
    42  	utilconversion "sigs.k8s.io/cluster-api/util/conversion"
    43  	"sigs.k8s.io/cluster-api/util/patch"
    44  )
    45  
    46  var externalReadyWait = 30 * time.Second
    47  
    48  func (r *Reconciler) reconcilePhase(_ context.Context, m *clusterv1.Machine) {
    49  	originalPhase := m.Status.Phase
    50  
    51  	// Set the phase to "pending" if nil.
    52  	if m.Status.Phase == "" {
    53  		m.Status.SetTypedPhase(clusterv1.MachinePhasePending)
    54  	}
    55  
    56  	// Set the phase to "provisioning" if bootstrap is ready and the infrastructure isn't.
    57  	if m.Status.BootstrapReady && !m.Status.InfrastructureReady {
    58  		m.Status.SetTypedPhase(clusterv1.MachinePhaseProvisioning)
    59  	}
    60  
    61  	// Set the phase to "provisioned" if there is a provider ID.
    62  	if m.Spec.ProviderID != nil {
    63  		m.Status.SetTypedPhase(clusterv1.MachinePhaseProvisioned)
    64  	}
    65  
    66  	// Set the phase to "running" if there is a NodeRef field and infrastructure is ready.
    67  	if m.Status.NodeRef != nil && m.Status.InfrastructureReady {
    68  		m.Status.SetTypedPhase(clusterv1.MachinePhaseRunning)
    69  	}
    70  
    71  	// Set the phase to "failed" if any of Status.FailureReason or Status.FailureMessage is not-nil.
    72  	if m.Status.FailureReason != nil || m.Status.FailureMessage != nil {
    73  		m.Status.SetTypedPhase(clusterv1.MachinePhaseFailed)
    74  	}
    75  
    76  	// Set the phase to "deleting" if the deletion timestamp is set.
    77  	if !m.DeletionTimestamp.IsZero() {
    78  		m.Status.SetTypedPhase(clusterv1.MachinePhaseDeleting)
    79  	}
    80  
    81  	// If the phase has changed, update the LastUpdated timestamp
    82  	if m.Status.Phase != originalPhase {
    83  		now := metav1.Now()
    84  		m.Status.LastUpdated = &now
    85  	}
    86  }
    87  
    88  // reconcileExternal handles generic unstructured objects referenced by a Machine.
    89  func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (external.ReconcileOutput, error) {
    90  	if err := utilconversion.UpdateReferenceAPIContract(ctx, r.Client, ref); err != nil {
    91  		return external.ReconcileOutput{}, err
    92  	}
    93  
    94  	result, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref)
    95  	if err != nil {
    96  		return external.ReconcileOutput{}, err
    97  	}
    98  	if result.RequeueAfter > 0 || result.Paused {
    99  		return result, nil
   100  	}
   101  
   102  	obj := result.Result
   103  
   104  	// Set failure reason and message, if any.
   105  	failureReason, failureMessage, err := external.FailuresFrom(obj)
   106  	if err != nil {
   107  		return external.ReconcileOutput{}, err
   108  	}
   109  	if failureReason != "" {
   110  		machineStatusError := capierrors.MachineStatusError(failureReason)
   111  		m.Status.FailureReason = &machineStatusError
   112  	}
   113  	if failureMessage != "" {
   114  		m.Status.FailureMessage = ptr.To(
   115  			fmt.Sprintf("Failure detected from referenced resource %v with name %q: %s",
   116  				obj.GroupVersionKind(), obj.GetName(), failureMessage),
   117  		)
   118  	}
   119  
   120  	return external.ReconcileOutput{Result: obj}, nil
   121  }
   122  
   123  // ensureExternalOwnershipAndWatch ensures that only the Machine owns the external object,
   124  // adds a watch to the external object if one does not already exist and adds the necessary labels.
   125  func (r *Reconciler) ensureExternalOwnershipAndWatch(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (external.ReconcileOutput, error) {
   126  	log := ctrl.LoggerFrom(ctx)
   127  
   128  	obj, err := external.Get(ctx, r.UnstructuredCachingClient, ref, m.Namespace)
   129  	if err != nil {
   130  		if apierrors.IsNotFound(errors.Cause(err)) {
   131  			log.Info("could not find external ref, requeuing", ref.Kind, klog.KRef(ref.Namespace, ref.Name))
   132  			return external.ReconcileOutput{RequeueAfter: externalReadyWait}, nil
   133  		}
   134  		return external.ReconcileOutput{}, err
   135  	}
   136  
   137  	// Ensure we add a watch to the external object, if there isn't one already.
   138  	if err := r.externalTracker.Watch(log, obj, handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Machine{})); err != nil {
   139  		return external.ReconcileOutput{}, err
   140  	}
   141  
   142  	// if external ref is paused, return error.
   143  	if annotations.IsPaused(cluster, obj) {
   144  		log.V(3).Info("External object referenced is paused")
   145  		return external.ReconcileOutput{Paused: true}, nil
   146  	}
   147  
   148  	// Initialize the patch helper.
   149  	patchHelper, err := patch.NewHelper(obj, r.Client)
   150  	if err != nil {
   151  		return external.ReconcileOutput{}, err
   152  	}
   153  
   154  	// removeOnCreateOwnerRefs removes MachineSet and control plane owners from the objects referred to by a Machine.
   155  	// These owner references are added initially because Machines don't exist when those objects are created.
   156  	// At this point the Machine exists and can be set as the controller reference.
   157  	if err := removeOnCreateOwnerRefs(cluster, m, obj); err != nil {
   158  		return external.ReconcileOutput{}, err
   159  	}
   160  
   161  	// Set external object ControllerReference to the Machine.
   162  	if err := controllerutil.SetControllerReference(m, obj, r.Client.Scheme()); err != nil {
   163  		return external.ReconcileOutput{}, err
   164  	}
   165  
   166  	// Set the Cluster label.
   167  	labels := obj.GetLabels()
   168  	if labels == nil {
   169  		labels = make(map[string]string)
   170  	}
   171  	labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName
   172  	obj.SetLabels(labels)
   173  
   174  	// Always attempt to Patch the external object.
   175  	if err := patchHelper.Patch(ctx, obj); err != nil {
   176  		return external.ReconcileOutput{}, err
   177  	}
   178  
   179  	return external.ReconcileOutput{Result: obj}, nil
   180  }
   181  
   182  // reconcileBootstrap reconciles the Spec.Bootstrap.ConfigRef object on a Machine.
   183  func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Result, error) {
   184  	log := ctrl.LoggerFrom(ctx)
   185  	cluster := s.cluster
   186  	m := s.machine
   187  
   188  	// If the Bootstrap ref is nil (and so the machine should use user generated data secret), return.
   189  	if m.Spec.Bootstrap.ConfigRef == nil {
   190  		return ctrl.Result{}, nil
   191  	}
   192  
   193  	// Call generic external reconciler if we have an external reference.
   194  	externalResult, err := r.reconcileExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef)
   195  	if err != nil {
   196  		return ctrl.Result{}, err
   197  	}
   198  	s.bootstrapConfig = externalResult.Result
   199  
   200  	// If the external object is paused return.
   201  	if externalResult.Paused {
   202  		return ctrl.Result{}, nil
   203  	}
   204  
   205  	if externalResult.RequeueAfter > 0 {
   206  		return ctrl.Result{RequeueAfter: externalResult.RequeueAfter}, nil
   207  	}
   208  
   209  	// If the bootstrap data is populated, set ready and return.
   210  	if m.Spec.Bootstrap.DataSecretName != nil {
   211  		m.Status.BootstrapReady = true
   212  		conditions.MarkTrue(m, clusterv1.BootstrapReadyCondition)
   213  		return ctrl.Result{}, nil
   214  	}
   215  	bootstrapConfig := externalResult.Result
   216  
   217  	// If the bootstrap config is being deleted, return early.
   218  	if !bootstrapConfig.GetDeletionTimestamp().IsZero() {
   219  		return ctrl.Result{}, nil
   220  	}
   221  
   222  	// Determine if the bootstrap provider is ready.
   223  	ready, err := external.IsReady(bootstrapConfig)
   224  	if err != nil {
   225  		return ctrl.Result{}, err
   226  	}
   227  
   228  	// Report a summary of current status of the bootstrap object defined for this machine.
   229  	conditions.SetMirror(m, clusterv1.BootstrapReadyCondition,
   230  		conditions.UnstructuredGetter(bootstrapConfig),
   231  		conditions.WithFallbackValue(ready, clusterv1.WaitingForDataSecretFallbackReason, clusterv1.ConditionSeverityInfo, ""),
   232  	)
   233  
   234  	// If the bootstrap provider is not ready, requeue.
   235  	if !ready {
   236  		log.Info("Waiting for bootstrap provider to generate data secret and report status.ready", bootstrapConfig.GetKind(), klog.KObj(bootstrapConfig))
   237  		return ctrl.Result{}, nil
   238  	}
   239  
   240  	// Get and set the name of the secret containing the bootstrap data.
   241  	secretName, _, err := unstructured.NestedString(bootstrapConfig.Object, "status", "dataSecretName")
   242  	if err != nil {
   243  		return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve dataSecretName from bootstrap provider for Machine %q in namespace %q", m.Name, m.Namespace)
   244  	} else if secretName == "" {
   245  		return ctrl.Result{}, errors.Errorf("retrieved empty dataSecretName from bootstrap provider for Machine %q in namespace %q", m.Name, m.Namespace)
   246  	}
   247  	m.Spec.Bootstrap.DataSecretName = ptr.To(secretName)
   248  	if !m.Status.BootstrapReady {
   249  		log.Info("Bootstrap provider generated data secret and reports status.ready", bootstrapConfig.GetKind(), klog.KObj(bootstrapConfig), "Secret", klog.KRef(m.Namespace, secretName))
   250  	}
   251  	m.Status.BootstrapReady = true
   252  	return ctrl.Result{}, nil
   253  }
   254  
   255  // reconcileInfrastructure reconciles the Spec.InfrastructureRef object on a Machine.
   256  func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctrl.Result, error) {
   257  	log := ctrl.LoggerFrom(ctx)
   258  	cluster := s.cluster
   259  	m := s.machine
   260  
   261  	// Call generic external reconciler.
   262  	infraReconcileResult, err := r.reconcileExternal(ctx, cluster, m, &m.Spec.InfrastructureRef)
   263  	if err != nil {
   264  		return ctrl.Result{}, err
   265  	}
   266  	s.infraMachine = infraReconcileResult.Result
   267  	if infraReconcileResult.RequeueAfter > 0 {
   268  		// Infra object went missing after the machine was up and running
   269  		if m.Status.InfrastructureReady {
   270  			log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state")
   271  			m.Status.FailureReason = ptr.To(capierrors.InvalidConfigurationMachineError)
   272  			m.Status.FailureMessage = ptr.To(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready",
   273  				m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name))
   274  			return ctrl.Result{}, errors.Errorf("could not find %v %q for Machine %q in namespace %q, requeuing", m.Spec.InfrastructureRef.GroupVersionKind().String(), m.Spec.InfrastructureRef.Name, m.Name, m.Namespace)
   275  		}
   276  		return ctrl.Result{RequeueAfter: infraReconcileResult.RequeueAfter}, nil
   277  	}
   278  	// if the external object is paused, return without any further processing
   279  	if infraReconcileResult.Paused {
   280  		return ctrl.Result{}, nil
   281  	}
   282  	infraConfig := infraReconcileResult.Result
   283  
   284  	if !infraConfig.GetDeletionTimestamp().IsZero() {
   285  		return ctrl.Result{}, nil
   286  	}
   287  
   288  	// Determine if the infrastructure provider is ready.
   289  	ready, err := external.IsReady(infraConfig)
   290  	if err != nil {
   291  		return ctrl.Result{}, err
   292  	}
   293  	if ready && !m.Status.InfrastructureReady {
   294  		log.Info("Infrastructure provider has completed machine infrastructure provisioning and reports status.ready", infraConfig.GetKind(), klog.KObj(infraConfig))
   295  	}
   296  	m.Status.InfrastructureReady = ready
   297  
   298  	// Report a summary of current status of the infrastructure object defined for this machine.
   299  	conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition,
   300  		conditions.UnstructuredGetter(infraConfig),
   301  		conditions.WithFallbackValue(ready, clusterv1.WaitingForInfrastructureFallbackReason, clusterv1.ConditionSeverityInfo, ""),
   302  	)
   303  
   304  	// If the infrastructure provider is not ready, return early.
   305  	if !ready {
   306  		log.Info("Waiting for infrastructure provider to create machine infrastructure and report status.ready", infraConfig.GetKind(), klog.KObj(infraConfig))
   307  		return ctrl.Result{}, nil
   308  	}
   309  
   310  	// Get Spec.ProviderID from the infrastructure provider.
   311  	var providerID string
   312  	if err := util.UnstructuredUnmarshalField(infraConfig, &providerID, "spec", "providerID"); err != nil {
   313  		return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve Spec.ProviderID from infrastructure provider for Machine %q in namespace %q", m.Name, m.Namespace)
   314  	} else if providerID == "" {
   315  		return ctrl.Result{}, errors.Errorf("retrieved empty Spec.ProviderID from infrastructure provider for Machine %q in namespace %q", m.Name, m.Namespace)
   316  	}
   317  
   318  	// Get and set Status.Addresses from the infrastructure provider.
   319  	err = util.UnstructuredUnmarshalField(infraConfig, &m.Status.Addresses, "status", "addresses")
   320  	if err != nil && err != util.ErrUnstructuredFieldNotFound {
   321  		return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve addresses from infrastructure provider for Machine %q in namespace %q", m.Name, m.Namespace)
   322  	}
   323  
   324  	// Get and set the failure domain from the infrastructure provider.
   325  	var failureDomain string
   326  	err = util.UnstructuredUnmarshalField(infraConfig, &failureDomain, "spec", "failureDomain")
   327  	switch {
   328  	case err == util.ErrUnstructuredFieldNotFound: // no-op
   329  	case err != nil:
   330  		return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve failure domain from infrastructure provider for Machine %q in namespace %q", m.Name, m.Namespace)
   331  	default:
   332  		m.Spec.FailureDomain = ptr.To(failureDomain)
   333  	}
   334  
   335  	m.Spec.ProviderID = ptr.To(providerID)
   336  	return ctrl.Result{}, nil
   337  }
   338  
   339  func (r *Reconciler) reconcileCertificateExpiry(_ context.Context, s *scope) (ctrl.Result, error) {
   340  	m := s.machine
   341  	var annotations map[string]string
   342  
   343  	if !util.IsControlPlaneMachine(m) {
   344  		// If the machine is not a control plane machine, return early.
   345  		return ctrl.Result{}, nil
   346  	}
   347  
   348  	var expiryInfoFound bool
   349  
   350  	// Check for certificate expiry information in the machine annotation.
   351  	// This should take precedence over other information.
   352  	annotations = m.GetAnnotations()
   353  	if expiry, ok := annotations[clusterv1.MachineCertificatesExpiryDateAnnotation]; ok {
   354  		expiryInfoFound = true
   355  		expiryTime, err := time.Parse(time.RFC3339, expiry)
   356  		if err != nil {
   357  			return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile certificates expiry: failed to parse expiry date from annotation on %s", klog.KObj(m))
   358  		}
   359  		expTime := metav1.NewTime(expiryTime)
   360  		m.Status.CertificatesExpiryDate = &expTime
   361  	} else if s.bootstrapConfig != nil {
   362  		// If the expiry information is not available on the machine annotation
   363  		// look for it on the bootstrap config.
   364  		annotations = s.bootstrapConfig.GetAnnotations()
   365  		if expiry, ok := annotations[clusterv1.MachineCertificatesExpiryDateAnnotation]; ok {
   366  			expiryInfoFound = true
   367  			expiryTime, err := time.Parse(time.RFC3339, expiry)
   368  			if err != nil {
   369  				return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile certificates expiry: failed to parse expiry date from annotation on %s", klog.KObj(s.bootstrapConfig))
   370  			}
   371  			expTime := metav1.NewTime(expiryTime)
   372  			m.Status.CertificatesExpiryDate = &expTime
   373  		}
   374  	}
   375  
   376  	// If the certificates expiry information is not fond on the machine
   377  	// and on the bootstrap config then reset machine.status.certificatesExpiryDate.
   378  	if !expiryInfoFound {
   379  		m.Status.CertificatesExpiryDate = nil
   380  	}
   381  
   382  	return ctrl.Result{}, nil
   383  }
   384  
   385  // removeOnCreateOwnerRefs will remove any MachineSet or control plane owner references from passed objects.
   386  func removeOnCreateOwnerRefs(cluster *clusterv1.Cluster, m *clusterv1.Machine, obj *unstructured.Unstructured) error {
   387  	cpGVK := getControlPlaneGVKForMachine(cluster, m)
   388  	for _, owner := range obj.GetOwnerReferences() {
   389  		ownerGV, err := schema.ParseGroupVersion(owner.APIVersion)
   390  		if err != nil {
   391  			return errors.Wrapf(err, "Could not remove ownerReference %v from object %s/%s", owner.String(), obj.GetKind(), obj.GetName())
   392  		}
   393  		if (ownerGV.Group == clusterv1.GroupVersion.Group && owner.Kind == "MachineSet") ||
   394  			(cpGVK != nil && ownerGV.Group == cpGVK.GroupVersion().Group && owner.Kind == cpGVK.Kind) {
   395  			ownerRefs := util.RemoveOwnerRef(obj.GetOwnerReferences(), owner)
   396  			obj.SetOwnerReferences(ownerRefs)
   397  		}
   398  	}
   399  	return nil
   400  }
   401  
   402  // getControlPlaneGVKForMachine returns the Kind of the control plane in the Cluster associated with the Machine.
   403  // This function checks that the Machine is managed by a control plane, and then retrieves the Kind from the Cluster's
   404  // .spec.controlPlaneRef.
   405  func getControlPlaneGVKForMachine(cluster *clusterv1.Cluster, machine *clusterv1.Machine) *schema.GroupVersionKind {
   406  	if _, ok := machine.GetLabels()[clusterv1.MachineControlPlaneLabel]; ok {
   407  		if cluster.Spec.ControlPlaneRef != nil {
   408  			gvk := cluster.Spec.ControlPlaneRef.GroupVersionKind()
   409  			return &gvk
   410  		}
   411  	}
   412  	return nil
   413  }