sigs.k8s.io/cluster-api-provider-azure@v1.17.0/controllers/azurecluster_controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	"github.com/pkg/errors"
    24  	corev1 "k8s.io/api/core/v1"
    25  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    26  	"k8s.io/client-go/tools/record"
    27  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    28  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    29  	"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
    30  	"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
    31  	"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
    32  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    33  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    34  	"sigs.k8s.io/cluster-api/util"
    35  	"sigs.k8s.io/cluster-api/util/annotations"
    36  	"sigs.k8s.io/cluster-api/util/conditions"
    37  	"sigs.k8s.io/cluster-api/util/predicates"
    38  	ctrl "sigs.k8s.io/controller-runtime"
    39  	"sigs.k8s.io/controller-runtime/pkg/builder"
    40  	"sigs.k8s.io/controller-runtime/pkg/client"
    41  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    42  	"sigs.k8s.io/controller-runtime/pkg/handler"
    43  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    44  )
    45  
    46  // AzureClusterReconciler reconciles an AzureCluster object.
    47  type AzureClusterReconciler struct {
    48  	client.Client
    49  	Recorder                  record.EventRecorder
    50  	Timeouts                  reconciler.Timeouts
    51  	WatchFilterValue          string
    52  	createAzureClusterService azureClusterServiceCreator
    53  }
    54  
    55  type azureClusterServiceCreator func(clusterScope *scope.ClusterScope) (*azureClusterService, error)
    56  
    57  // NewAzureClusterReconciler returns a new AzureClusterReconciler instance.
    58  func NewAzureClusterReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureClusterReconciler {
    59  	acr := &AzureClusterReconciler{
    60  		Client:           client,
    61  		Recorder:         recorder,
    62  		Timeouts:         timeouts,
    63  		WatchFilterValue: watchFilterValue,
    64  	}
    65  
    66  	acr.createAzureClusterService = newAzureClusterService
    67  
    68  	return acr
    69  }
    70  
    71  // SetupWithManager initializes this controller with a manager.
    72  func (acr *AzureClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options Options) error {
    73  	ctx, log, done := tele.StartSpanWithLogger(ctx,
    74  		"controllers.AzureClusterReconciler.SetupWithManager",
    75  		tele.KVP("controller", "AzureCluster"),
    76  	)
    77  	defer done()
    78  
    79  	var r reconcile.Reconciler = acr
    80  	if options.Cache != nil {
    81  		r = coalescing.NewReconciler(acr, options.Cache, log)
    82  	}
    83  
    84  	return ctrl.NewControllerManagedBy(mgr).
    85  		WithOptions(options.Options).
    86  		For(&infrav1.AzureCluster{}).
    87  		WithEventFilter(predicates.ResourceHasFilterLabel(log, acr.WatchFilterValue)).
    88  		WithEventFilter(predicates.ResourceIsNotExternallyManaged(log)).
    89  		// Add a watch on clusterv1.Cluster object for pause/unpause notifications.
    90  		Watches(
    91  			&clusterv1.Cluster{},
    92  			handler.EnqueueRequestsFromMapFunc(util.ClusterToInfrastructureMapFunc(ctx, infrav1.GroupVersion.WithKind(infrav1.AzureClusterKind), mgr.GetClient(), &infrav1.AzureCluster{})),
    93  			builder.WithPredicates(
    94  				ClusterUpdatePauseChange(log),
    95  				predicates.ResourceHasFilterLabel(log, acr.WatchFilterValue),
    96  			),
    97  		).
    98  		Complete(r)
    99  }
   100  
   101  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusters,verbs=get;list;watch;create;update;patch;delete
   102  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusters/status,verbs=get;update;patch
   103  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch
   104  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinetemplates;azuremachinetemplates/status,verbs=get;list;watch
   105  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusteridentities;azureclusteridentities/status,verbs=get;list;watch;create;update;patch;delete
   106  // +kubebuilder:rbac:groups="",resources=namespaces,verbs=list;
   107  // +kubebuilder:rbac:groups=resources.azure.com,resources=resourcegroups,verbs=get;list;watch;create;update;patch;delete
   108  // +kubebuilder:rbac:groups=resources.azure.com,resources=resourcegroups/status,verbs=get;list;watch
   109  // +kubebuilder:rbac:groups=network.azure.com,resources=natgateways;bastionhosts;privateendpoints;virtualnetworks;virtualnetworkssubnets,verbs=get;list;watch;create;update;patch;delete
   110  // +kubebuilder:rbac:groups=network.azure.com,resources=natgateways/status;bastionhosts/status;privateendpoints/status;virtualnetworks/status;virtualnetworkssubnets/status,verbs=get;list;watch
   111  
   112  // Reconcile idempotently gets, creates, and updates a cluster.
   113  func (acr *AzureClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   114  	ctx, cancel := context.WithTimeout(ctx, acr.Timeouts.DefaultedLoopTimeout())
   115  	defer cancel()
   116  
   117  	ctx, log, done := tele.StartSpanWithLogger(
   118  		ctx,
   119  		"controllers.AzureClusterReconciler.Reconcile",
   120  		tele.KVP("namespace", req.Namespace),
   121  		tele.KVP("name", req.Name),
   122  		tele.KVP("kind", infrav1.AzureClusterKind),
   123  	)
   124  	defer done()
   125  
   126  	// Fetch the AzureCluster instance
   127  	azureCluster := &infrav1.AzureCluster{}
   128  	err := acr.Get(ctx, req.NamespacedName, azureCluster)
   129  	if err != nil {
   130  		if apierrors.IsNotFound(err) {
   131  			acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "AzureClusterObjectNotFound", err.Error())
   132  			log.Info("object was not found")
   133  			return reconcile.Result{}, nil
   134  		}
   135  		return reconcile.Result{}, err
   136  	}
   137  
   138  	// Fetch the Cluster.
   139  	cluster, err := util.GetOwnerCluster(ctx, acr.Client, azureCluster.ObjectMeta)
   140  	if err != nil {
   141  		return reconcile.Result{}, err
   142  	}
   143  	if cluster == nil {
   144  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "OwnerRefNotSet", "Cluster Controller has not yet set OwnerRef")
   145  		log.Info("Cluster Controller has not yet set OwnerRef")
   146  		return reconcile.Result{}, nil
   147  	}
   148  
   149  	log = log.WithValues("cluster", cluster.Name)
   150  
   151  	// Create the scope.
   152  	clusterScope, err := scope.NewClusterScope(ctx, scope.ClusterScopeParams{
   153  		Client:       acr.Client,
   154  		Cluster:      cluster,
   155  		AzureCluster: azureCluster,
   156  		Timeouts:     acr.Timeouts,
   157  	})
   158  	if err != nil {
   159  		err = errors.Wrap(err, "failed to create scope")
   160  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "CreateClusterScopeFailed", err.Error())
   161  		return reconcile.Result{}, err
   162  	}
   163  
   164  	// Always close the scope when exiting this function so we can persist any AzureMachine changes.
   165  	defer func() {
   166  		if err := clusterScope.Close(ctx); err != nil && reterr == nil {
   167  			reterr = err
   168  		}
   169  	}()
   170  
   171  	// Return early if the object or Cluster is paused.
   172  	if annotations.IsPaused(cluster, azureCluster) {
   173  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "ClusterPaused", "AzureCluster or linked Cluster is marked as paused. Won't reconcile normally")
   174  		log.Info("AzureCluster or linked Cluster is marked as paused. Won't reconcile normally")
   175  		return acr.reconcilePause(ctx, clusterScope)
   176  	}
   177  
   178  	if azureCluster.Spec.IdentityRef != nil {
   179  		err := EnsureClusterIdentity(ctx, acr.Client, azureCluster, azureCluster.Spec.IdentityRef, infrav1.ClusterFinalizer)
   180  		if err != nil {
   181  			return reconcile.Result{}, err
   182  		}
   183  	} else {
   184  		log.Info(fmt.Sprintf("WARNING, %s", deprecatedManagerCredsWarning))
   185  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "AzureClusterIdentity", deprecatedManagerCredsWarning)
   186  	}
   187  
   188  	// Handle deleted clusters
   189  	if !azureCluster.DeletionTimestamp.IsZero() {
   190  		return acr.reconcileDelete(ctx, clusterScope)
   191  	}
   192  
   193  	// Handle non-deleted clusters
   194  	return acr.reconcileNormal(ctx, clusterScope)
   195  }
   196  
   197  func (acr *AzureClusterReconciler) reconcileNormal(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   198  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcileNormal")
   199  	defer done()
   200  
   201  	log.Info("Reconciling AzureCluster")
   202  	azureCluster := clusterScope.AzureCluster
   203  
   204  	// Register our finalizer immediately to avoid orphaning Azure resources on delete
   205  	needsPatch := controllerutil.AddFinalizer(azureCluster, infrav1.ClusterFinalizer)
   206  	// Register the block-move annotation immediately to avoid moving un-paused ASO resources
   207  	needsPatch = AddBlockMoveAnnotation(azureCluster) || needsPatch
   208  	if needsPatch {
   209  		if err := clusterScope.PatchObject(ctx); err != nil {
   210  			return reconcile.Result{}, err
   211  		}
   212  	}
   213  
   214  	acs, err := acr.createAzureClusterService(clusterScope)
   215  	if err != nil {
   216  		return reconcile.Result{}, errors.Wrap(err, "failed to create a new AzureClusterReconciler")
   217  	}
   218  
   219  	if err := acs.Reconcile(ctx); err != nil {
   220  		// Handle terminal & transient errors
   221  		var reconcileError azure.ReconcileError
   222  		if errors.As(err, &reconcileError) {
   223  			if reconcileError.IsTerminal() {
   224  				acr.Recorder.Eventf(clusterScope.AzureCluster, corev1.EventTypeWarning, "ReconcileError", errors.Wrapf(err, "failed to reconcile AzureCluster").Error())
   225  				log.Error(err, "failed to reconcile AzureCluster", "name", clusterScope.ClusterName())
   226  				conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, "")
   227  				return reconcile.Result{}, nil
   228  			}
   229  			if reconcileError.IsTransient() {
   230  				if azure.IsOperationNotDoneError(reconcileError) {
   231  					log.V(2).Info(fmt.Sprintf("AzureCluster reconcile not done: %s", reconcileError.Error()))
   232  				} else {
   233  					log.V(2).Info(fmt.Sprintf("transient failure to reconcile AzureCluster, retrying: %s", reconcileError.Error()))
   234  				}
   235  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   236  			}
   237  		}
   238  
   239  		wrappedErr := errors.Wrap(err, "failed to reconcile cluster services")
   240  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "ClusterReconcilerNormalFailed", wrappedErr.Error())
   241  		conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, wrappedErr.Error())
   242  		return reconcile.Result{}, wrappedErr
   243  	}
   244  
   245  	// Set APIEndpoints so the Cluster API Cluster Controller can pull them
   246  	if azureCluster.Spec.ControlPlaneEndpoint.Host == "" {
   247  		azureCluster.Spec.ControlPlaneEndpoint.Host = clusterScope.APIServerHost()
   248  	}
   249  	if azureCluster.Spec.ControlPlaneEndpoint.Port == 0 {
   250  		azureCluster.Spec.ControlPlaneEndpoint.Port = clusterScope.APIServerPort()
   251  	}
   252  
   253  	// No errors, so mark us ready so the Cluster API Cluster Controller can pull it
   254  	azureCluster.Status.Ready = true
   255  	conditions.MarkTrue(azureCluster, infrav1.NetworkInfrastructureReadyCondition)
   256  
   257  	return reconcile.Result{}, nil
   258  }
   259  
   260  //nolint:unparam // Always returns an empty struct for reconcile.Result
   261  func (acr *AzureClusterReconciler) reconcilePause(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   262  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcilePause")
   263  	defer done()
   264  
   265  	log.Info("Reconciling AzureCluster pause")
   266  
   267  	acs, err := acr.createAzureClusterService(clusterScope)
   268  	if err != nil {
   269  		return reconcile.Result{}, errors.Wrap(err, "failed to create a new azureClusterService")
   270  	}
   271  
   272  	if err := acs.Pause(ctx); err != nil {
   273  		return reconcile.Result{}, errors.Wrap(err, "failed to pause cluster services")
   274  	}
   275  	RemoveBlockMoveAnnotation(clusterScope.AzureCluster)
   276  
   277  	return reconcile.Result{}, nil
   278  }
   279  
   280  func (acr *AzureClusterReconciler) reconcileDelete(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   281  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcileDelete")
   282  	defer done()
   283  
   284  	log.Info("Reconciling AzureCluster delete")
   285  
   286  	azureCluster := clusterScope.AzureCluster
   287  
   288  	acs, err := acr.createAzureClusterService(clusterScope)
   289  	if err != nil {
   290  		return reconcile.Result{}, errors.Wrap(err, "failed to create a new AzureClusterReconciler")
   291  	}
   292  
   293  	if err := acs.Delete(ctx); err != nil {
   294  		// Handle transient errors
   295  		var reconcileError azure.ReconcileError
   296  		if errors.As(err, &reconcileError) {
   297  			if reconcileError.IsTransient() {
   298  				if azure.IsOperationNotDoneError(reconcileError) {
   299  					log.V(2).Info(fmt.Sprintf("AzureCluster delete not done: %s", reconcileError.Error()))
   300  				} else {
   301  					log.V(2).Info("transient failure to delete AzureCluster, retrying")
   302  				}
   303  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   304  			}
   305  		}
   306  
   307  		wrappedErr := errors.Wrapf(err, "error deleting AzureCluster %s/%s", azureCluster.Namespace, azureCluster.Name)
   308  		acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "ClusterReconcilerDeleteFailed", wrappedErr.Error())
   309  		conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
   310  		return reconcile.Result{}, wrappedErr
   311  	}
   312  
   313  	// Cluster is deleted so remove the finalizer.
   314  	controllerutil.RemoveFinalizer(azureCluster, infrav1.ClusterFinalizer)
   315  
   316  	if azureCluster.Spec.IdentityRef != nil {
   317  		// Cluster is deleted so remove the identity finalizer.
   318  		err := RemoveClusterIdentityFinalizer(ctx, acr.Client, azureCluster, azureCluster.Spec.IdentityRef, infrav1.ClusterFinalizer)
   319  		if err != nil {
   320  			return reconcile.Result{}, err
   321  		}
   322  	}
   323  
   324  	return reconcile.Result{}, nil
   325  }