sigs.k8s.io/cluster-api-provider-azure@v1.14.3/controllers/azurecluster_controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	"github.com/pkg/errors"
    24  	corev1 "k8s.io/api/core/v1"
    25  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    26  	"k8s.io/client-go/tools/record"
    27  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    28  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    29  	"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
    30  	"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
    31  	"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
    32  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    33  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    34  	"sigs.k8s.io/cluster-api/util"
    35  	"sigs.k8s.io/cluster-api/util/annotations"
    36  	"sigs.k8s.io/cluster-api/util/conditions"
    37  	"sigs.k8s.io/cluster-api/util/predicates"
    38  	ctrl "sigs.k8s.io/controller-runtime"
    39  	"sigs.k8s.io/controller-runtime/pkg/client"
    40  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    41  	"sigs.k8s.io/controller-runtime/pkg/handler"
    42  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    43  	"sigs.k8s.io/controller-runtime/pkg/source"
    44  )
    45  
    46  // AzureClusterReconciler reconciles an AzureCluster object.
    47  type AzureClusterReconciler struct {
    48  	client.Client
    49  	Recorder                  record.EventRecorder
    50  	Timeouts                  reconciler.Timeouts
    51  	WatchFilterValue          string
    52  	createAzureClusterService azureClusterServiceCreator
    53  }
    54  
    55  type azureClusterServiceCreator func(clusterScope *scope.ClusterScope) (*azureClusterService, error)
    56  
    57  // NewAzureClusterReconciler returns a new AzureClusterReconciler instance.
    58  func NewAzureClusterReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureClusterReconciler {
    59  	acr := &AzureClusterReconciler{
    60  		Client:           client,
    61  		Recorder:         recorder,
    62  		Timeouts:         timeouts,
    63  		WatchFilterValue: watchFilterValue,
    64  	}
    65  
    66  	acr.createAzureClusterService = newAzureClusterService
    67  
    68  	return acr
    69  }
    70  
    71  // SetupWithManager initializes this controller with a manager.
    72  func (acr *AzureClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options Options) error {
    73  	ctx, log, done := tele.StartSpanWithLogger(ctx,
    74  		"controllers.AzureClusterReconciler.SetupWithManager",
    75  		tele.KVP("controller", "AzureCluster"),
    76  	)
    77  	defer done()
    78  
    79  	var r reconcile.Reconciler = acr
    80  	if options.Cache != nil {
    81  		r = coalescing.NewReconciler(acr, options.Cache, log)
    82  	}
    83  
    84  	c, err := ctrl.NewControllerManagedBy(mgr).
    85  		WithOptions(options.Options).
    86  		For(&infrav1.AzureCluster{}).
    87  		WithEventFilter(predicates.ResourceHasFilterLabel(log, acr.WatchFilterValue)).
    88  		WithEventFilter(predicates.ResourceIsNotExternallyManaged(log)).
    89  		Build(r)
    90  	if err != nil {
    91  		return errors.Wrap(err, "error creating controller")
    92  	}
    93  
    94  	// Add a watch on clusterv1.Cluster object for pause/unpause notifications.
    95  	if err = c.Watch(
    96  		source.Kind(mgr.GetCache(), &clusterv1.Cluster{}),
    97  		handler.EnqueueRequestsFromMapFunc(util.ClusterToInfrastructureMapFunc(ctx, infrav1.GroupVersion.WithKind(infrav1.AzureClusterKind), mgr.GetClient(), &infrav1.AzureCluster{})),
    98  		ClusterUpdatePauseChange(log),
    99  		predicates.ResourceHasFilterLabel(log, acr.WatchFilterValue),
   100  	); err != nil {
   101  		return errors.Wrap(err, "failed adding a watch for ready clusters")
   102  	}
   103  
   104  	return nil
   105  }
   106  
   107  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusters,verbs=get;list;watch;create;update;patch;delete
   108  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusters/status,verbs=get;update;patch
   109  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch
   110  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinetemplates;azuremachinetemplates/status,verbs=get;list;watch
   111  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusteridentities;azureclusteridentities/status,verbs=get;list;watch;create;update;patch;delete
   112  // +kubebuilder:rbac:groups="",resources=namespaces,verbs=list;
   113  // +kubebuilder:rbac:groups=resources.azure.com,resources=resourcegroups,verbs=get;list;watch;create;update;patch;delete
   114  // +kubebuilder:rbac:groups=resources.azure.com,resources=resourcegroups/status,verbs=get;list;watch
   115  // +kubebuilder:rbac:groups=network.azure.com,resources=natgateways;bastionhosts;privateendpoints;virtualnetworks;virtualnetworkssubnets,verbs=get;list;watch;create;update;patch;delete
   116  // +kubebuilder:rbac:groups=network.azure.com,resources=natgateways/status;bastionhosts/status;privateendpoints/status;virtualnetworks/status;virtualnetworkssubnets/status,verbs=get;list;watch
   117  
   118  // Reconcile idempotently gets, creates, and updates a cluster.
   119  func (acr *AzureClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   120  	ctx, cancel := context.WithTimeout(ctx, acr.Timeouts.DefaultedLoopTimeout())
   121  	defer cancel()
   122  
   123  	ctx, log, done := tele.StartSpanWithLogger(
   124  		ctx,
   125  		"controllers.AzureClusterReconciler.Reconcile",
   126  		tele.KVP("namespace", req.Namespace),
   127  		tele.KVP("name", req.Name),
   128  		tele.KVP("kind", infrav1.AzureClusterKind),
   129  	)
   130  	defer done()
   131  
   132  	// Fetch the AzureCluster instance
   133  	azureCluster := &infrav1.AzureCluster{}
   134  	err := acr.Get(ctx, req.NamespacedName, azureCluster)
   135  	if err != nil {
   136  		if apierrors.IsNotFound(err) {
   137  			acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "AzureClusterObjectNotFound", err.Error())
   138  			log.Info("object was not found")
   139  			return reconcile.Result{}, nil
   140  		}
   141  		return reconcile.Result{}, err
   142  	}
   143  
   144  	// Fetch the Cluster.
   145  	cluster, err := util.GetOwnerCluster(ctx, acr.Client, azureCluster.ObjectMeta)
   146  	if err != nil {
   147  		return reconcile.Result{}, err
   148  	}
   149  	if cluster == nil {
   150  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "OwnerRefNotSet", "Cluster Controller has not yet set OwnerRef")
   151  		log.Info("Cluster Controller has not yet set OwnerRef")
   152  		return reconcile.Result{}, nil
   153  	}
   154  
   155  	log = log.WithValues("cluster", cluster.Name)
   156  
   157  	// Create the scope.
   158  	clusterScope, err := scope.NewClusterScope(ctx, scope.ClusterScopeParams{
   159  		Client:       acr.Client,
   160  		Cluster:      cluster,
   161  		AzureCluster: azureCluster,
   162  		Timeouts:     acr.Timeouts,
   163  	})
   164  	if err != nil {
   165  		err = errors.Wrap(err, "failed to create scope")
   166  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "CreateClusterScopeFailed", err.Error())
   167  		return reconcile.Result{}, err
   168  	}
   169  
   170  	// Always close the scope when exiting this function so we can persist any AzureMachine changes.
   171  	defer func() {
   172  		if err := clusterScope.Close(ctx); err != nil && reterr == nil {
   173  			reterr = err
   174  		}
   175  	}()
   176  
   177  	// Return early if the object or Cluster is paused.
   178  	if annotations.IsPaused(cluster, azureCluster) {
   179  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "ClusterPaused", "AzureCluster or linked Cluster is marked as paused. Won't reconcile normally")
   180  		log.Info("AzureCluster or linked Cluster is marked as paused. Won't reconcile normally")
   181  		return acr.reconcilePause(ctx, clusterScope)
   182  	}
   183  
   184  	if azureCluster.Spec.IdentityRef != nil {
   185  		err := EnsureClusterIdentity(ctx, acr.Client, azureCluster, azureCluster.Spec.IdentityRef, infrav1.ClusterFinalizer)
   186  		if err != nil {
   187  			return reconcile.Result{}, err
   188  		}
   189  	} else {
   190  		log.Info(fmt.Sprintf("WARNING, %s", deprecatedManagerCredsWarning))
   191  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "AzureClusterIdentity", deprecatedManagerCredsWarning)
   192  	}
   193  
   194  	// Handle deleted clusters
   195  	if !azureCluster.DeletionTimestamp.IsZero() {
   196  		return acr.reconcileDelete(ctx, clusterScope)
   197  	}
   198  
   199  	// Handle non-deleted clusters
   200  	return acr.reconcileNormal(ctx, clusterScope)
   201  }
   202  
   203  func (acr *AzureClusterReconciler) reconcileNormal(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   204  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcileNormal")
   205  	defer done()
   206  
   207  	log.Info("Reconciling AzureCluster")
   208  	azureCluster := clusterScope.AzureCluster
   209  
   210  	// Register our finalizer immediately to avoid orphaning Azure resources on delete
   211  	needsPatch := controllerutil.AddFinalizer(azureCluster, infrav1.ClusterFinalizer)
   212  	// Register the block-move annotation immediately to avoid moving un-paused ASO resources
   213  	needsPatch = AddBlockMoveAnnotation(azureCluster) || needsPatch
   214  	if needsPatch {
   215  		if err := clusterScope.PatchObject(ctx); err != nil {
   216  			return reconcile.Result{}, err
   217  		}
   218  	}
   219  
   220  	acs, err := acr.createAzureClusterService(clusterScope)
   221  	if err != nil {
   222  		return reconcile.Result{}, errors.Wrap(err, "failed to create a new AzureClusterReconciler")
   223  	}
   224  
   225  	if err := acs.Reconcile(ctx); err != nil {
   226  		// Handle terminal & transient errors
   227  		var reconcileError azure.ReconcileError
   228  		if errors.As(err, &reconcileError) {
   229  			if reconcileError.IsTerminal() {
   230  				acr.Recorder.Eventf(clusterScope.AzureCluster, corev1.EventTypeWarning, "ReconcileError", errors.Wrapf(err, "failed to reconcile AzureCluster").Error())
   231  				log.Error(err, "failed to reconcile AzureCluster", "name", clusterScope.ClusterName())
   232  				conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, "")
   233  				return reconcile.Result{}, nil
   234  			}
   235  			if reconcileError.IsTransient() {
   236  				if azure.IsOperationNotDoneError(reconcileError) {
   237  					log.V(2).Info(fmt.Sprintf("AzureCluster reconcile not done: %s", reconcileError.Error()))
   238  				} else {
   239  					log.V(2).Info(fmt.Sprintf("transient failure to reconcile AzureCluster, retrying: %s", reconcileError.Error()))
   240  				}
   241  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   242  			}
   243  		}
   244  
   245  		wrappedErr := errors.Wrap(err, "failed to reconcile cluster services")
   246  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "ClusterReconcilerNormalFailed", wrappedErr.Error())
   247  		conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, wrappedErr.Error())
   248  		return reconcile.Result{}, wrappedErr
   249  	}
   250  
   251  	// Set APIEndpoints so the Cluster API Cluster Controller can pull them
   252  	if azureCluster.Spec.ControlPlaneEndpoint.Host == "" {
   253  		azureCluster.Spec.ControlPlaneEndpoint.Host = clusterScope.APIServerHost()
   254  	}
   255  	if azureCluster.Spec.ControlPlaneEndpoint.Port == 0 {
   256  		azureCluster.Spec.ControlPlaneEndpoint.Port = clusterScope.APIServerPort()
   257  	}
   258  
   259  	// No errors, so mark us ready so the Cluster API Cluster Controller can pull it
   260  	azureCluster.Status.Ready = true
   261  	conditions.MarkTrue(azureCluster, infrav1.NetworkInfrastructureReadyCondition)
   262  
   263  	return reconcile.Result{}, nil
   264  }
   265  
   266  //nolint:unparam // Always returns an empty struct for reconcile.Result
   267  func (acr *AzureClusterReconciler) reconcilePause(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   268  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcilePause")
   269  	defer done()
   270  
   271  	log.Info("Reconciling AzureCluster pause")
   272  
   273  	acs, err := acr.createAzureClusterService(clusterScope)
   274  	if err != nil {
   275  		return reconcile.Result{}, errors.Wrap(err, "failed to create a new azureClusterService")
   276  	}
   277  
   278  	if err := acs.Pause(ctx); err != nil {
   279  		return reconcile.Result{}, errors.Wrap(err, "failed to pause cluster services")
   280  	}
   281  	RemoveBlockMoveAnnotation(clusterScope.AzureCluster)
   282  
   283  	return reconcile.Result{}, nil
   284  }
   285  
   286  func (acr *AzureClusterReconciler) reconcileDelete(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   287  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcileDelete")
   288  	defer done()
   289  
   290  	log.Info("Reconciling AzureCluster delete")
   291  
   292  	azureCluster := clusterScope.AzureCluster
   293  
   294  	acs, err := acr.createAzureClusterService(clusterScope)
   295  	if err != nil {
   296  		return reconcile.Result{}, errors.Wrap(err, "failed to create a new AzureClusterReconciler")
   297  	}
   298  
   299  	if err := acs.Delete(ctx); err != nil {
   300  		// Handle transient errors
   301  		var reconcileError azure.ReconcileError
   302  		if errors.As(err, &reconcileError) {
   303  			if reconcileError.IsTransient() {
   304  				if azure.IsOperationNotDoneError(reconcileError) {
   305  					log.V(2).Info(fmt.Sprintf("AzureCluster delete not done: %s", reconcileError.Error()))
   306  				} else {
   307  					log.V(2).Info("transient failure to delete AzureCluster, retrying")
   308  				}
   309  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   310  			}
   311  		}
   312  
   313  		wrappedErr := errors.Wrapf(err, "error deleting AzureCluster %s/%s", azureCluster.Namespace, azureCluster.Name)
   314  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "ClusterReconcilerDeleteFailed", wrappedErr.Error())
   315  		conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
   316  		return reconcile.Result{}, wrappedErr
   317  	}
   318  
   319  	// Cluster is deleted so remove the finalizer.
   320  	controllerutil.RemoveFinalizer(azureCluster, infrav1.ClusterFinalizer)
   321  
   322  	if azureCluster.Spec.IdentityRef != nil {
   323  		// Cluster is deleted so remove the identity finalizer.
   324  		err := RemoveClusterIdentityFinalizer(ctx, acr.Client, azureCluster, azureCluster.Spec.IdentityRef, infrav1.ClusterFinalizer)
   325  		if err != nil {
   326  			return reconcile.Result{}, err
   327  		}
   328  	}
   329  
   330  	return reconcile.Result{}, nil
   331  }