sigs.k8s.io/cluster-api-provider-azure@v1.17.0/controllers/azurecluster_controller.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 23 "github.com/pkg/errors" 24 corev1 "k8s.io/api/core/v1" 25 apierrors "k8s.io/apimachinery/pkg/api/errors" 26 "k8s.io/client-go/tools/record" 27 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 28 "sigs.k8s.io/cluster-api-provider-azure/azure" 29 "sigs.k8s.io/cluster-api-provider-azure/azure/scope" 30 "sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing" 31 "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" 32 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 33 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 34 "sigs.k8s.io/cluster-api/util" 35 "sigs.k8s.io/cluster-api/util/annotations" 36 "sigs.k8s.io/cluster-api/util/conditions" 37 "sigs.k8s.io/cluster-api/util/predicates" 38 ctrl "sigs.k8s.io/controller-runtime" 39 "sigs.k8s.io/controller-runtime/pkg/builder" 40 "sigs.k8s.io/controller-runtime/pkg/client" 41 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 42 "sigs.k8s.io/controller-runtime/pkg/handler" 43 "sigs.k8s.io/controller-runtime/pkg/reconcile" 44 ) 45 46 // AzureClusterReconciler reconciles an AzureCluster object. 47 type AzureClusterReconciler struct { 48 client.Client 49 Recorder record.EventRecorder 50 Timeouts reconciler.Timeouts 51 WatchFilterValue string 52 createAzureClusterService azureClusterServiceCreator 53 } 54 55 type azureClusterServiceCreator func(clusterScope *scope.ClusterScope) (*azureClusterService, error) 56 57 // NewAzureClusterReconciler returns a new AzureClusterReconciler instance. 58 func NewAzureClusterReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureClusterReconciler { 59 acr := &AzureClusterReconciler{ 60 Client: client, 61 Recorder: recorder, 62 Timeouts: timeouts, 63 WatchFilterValue: watchFilterValue, 64 } 65 66 acr.createAzureClusterService = newAzureClusterService 67 68 return acr 69 } 70 71 // SetupWithManager initializes this controller with a manager. 72 func (acr *AzureClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options Options) error { 73 ctx, log, done := tele.StartSpanWithLogger(ctx, 74 "controllers.AzureClusterReconciler.SetupWithManager", 75 tele.KVP("controller", "AzureCluster"), 76 ) 77 defer done() 78 79 var r reconcile.Reconciler = acr 80 if options.Cache != nil { 81 r = coalescing.NewReconciler(acr, options.Cache, log) 82 } 83 84 return ctrl.NewControllerManagedBy(mgr). 85 WithOptions(options.Options). 86 For(&infrav1.AzureCluster{}). 87 WithEventFilter(predicates.ResourceHasFilterLabel(log, acr.WatchFilterValue)). 88 WithEventFilter(predicates.ResourceIsNotExternallyManaged(log)). 89 // Add a watch on clusterv1.Cluster object for pause/unpause notifications. 90 Watches( 91 &clusterv1.Cluster{}, 92 handler.EnqueueRequestsFromMapFunc(util.ClusterToInfrastructureMapFunc(ctx, infrav1.GroupVersion.WithKind(infrav1.AzureClusterKind), mgr.GetClient(), &infrav1.AzureCluster{})), 93 builder.WithPredicates( 94 ClusterUpdatePauseChange(log), 95 predicates.ResourceHasFilterLabel(log, acr.WatchFilterValue), 96 ), 97 ). 98 Complete(r) 99 } 100 101 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusters,verbs=get;list;watch;create;update;patch;delete 102 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusters/status,verbs=get;update;patch 103 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch 104 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinetemplates;azuremachinetemplates/status,verbs=get;list;watch 105 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureclusteridentities;azureclusteridentities/status,verbs=get;list;watch;create;update;patch;delete 106 // +kubebuilder:rbac:groups="",resources=namespaces,verbs=list; 107 // +kubebuilder:rbac:groups=resources.azure.com,resources=resourcegroups,verbs=get;list;watch;create;update;patch;delete 108 // +kubebuilder:rbac:groups=resources.azure.com,resources=resourcegroups/status,verbs=get;list;watch 109 // +kubebuilder:rbac:groups=network.azure.com,resources=natgateways;bastionhosts;privateendpoints;virtualnetworks;virtualnetworkssubnets,verbs=get;list;watch;create;update;patch;delete 110 // +kubebuilder:rbac:groups=network.azure.com,resources=natgateways/status;bastionhosts/status;privateendpoints/status;virtualnetworks/status;virtualnetworkssubnets/status,verbs=get;list;watch 111 112 // Reconcile idempotently gets, creates, and updates a cluster. 113 func (acr *AzureClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 114 ctx, cancel := context.WithTimeout(ctx, acr.Timeouts.DefaultedLoopTimeout()) 115 defer cancel() 116 117 ctx, log, done := tele.StartSpanWithLogger( 118 ctx, 119 "controllers.AzureClusterReconciler.Reconcile", 120 tele.KVP("namespace", req.Namespace), 121 tele.KVP("name", req.Name), 122 tele.KVP("kind", infrav1.AzureClusterKind), 123 ) 124 defer done() 125 126 // Fetch the AzureCluster instance 127 azureCluster := &infrav1.AzureCluster{} 128 err := acr.Get(ctx, req.NamespacedName, azureCluster) 129 if err != nil { 130 if apierrors.IsNotFound(err) { 131 acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "AzureClusterObjectNotFound", err.Error()) 132 log.Info("object was not found") 133 return reconcile.Result{}, nil 134 } 135 return reconcile.Result{}, err 136 } 137 138 // Fetch the Cluster. 139 cluster, err := util.GetOwnerCluster(ctx, acr.Client, azureCluster.ObjectMeta) 140 if err != nil { 141 return reconcile.Result{}, err 142 } 143 if cluster == nil { 144 acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "OwnerRefNotSet", "Cluster Controller has not yet set OwnerRef") 145 log.Info("Cluster Controller has not yet set OwnerRef") 146 return reconcile.Result{}, nil 147 } 148 149 log = log.WithValues("cluster", cluster.Name) 150 151 // Create the scope. 152 clusterScope, err := scope.NewClusterScope(ctx, scope.ClusterScopeParams{ 153 Client: acr.Client, 154 Cluster: cluster, 155 AzureCluster: azureCluster, 156 Timeouts: acr.Timeouts, 157 }) 158 if err != nil { 159 err = errors.Wrap(err, "failed to create scope") 160 acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "CreateClusterScopeFailed", err.Error()) 161 return reconcile.Result{}, err 162 } 163 164 // Always close the scope when exiting this function so we can persist any AzureMachine changes. 165 defer func() { 166 if err := clusterScope.Close(ctx); err != nil && reterr == nil { 167 reterr = err 168 } 169 }() 170 171 // Return early if the object or Cluster is paused. 172 if annotations.IsPaused(cluster, azureCluster) { 173 acr.Recorder.Eventf(azureCluster, corev1.EventTypeNormal, "ClusterPaused", "AzureCluster or linked Cluster is marked as paused. Won't reconcile normally") 174 log.Info("AzureCluster or linked Cluster is marked as paused. Won't reconcile normally") 175 return acr.reconcilePause(ctx, clusterScope) 176 } 177 178 if azureCluster.Spec.IdentityRef != nil { 179 err := EnsureClusterIdentity(ctx, acr.Client, azureCluster, azureCluster.Spec.IdentityRef, infrav1.ClusterFinalizer) 180 if err != nil { 181 return reconcile.Result{}, err 182 } 183 } else { 184 log.Info(fmt.Sprintf("WARNING, %s", deprecatedManagerCredsWarning)) 185 acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "AzureClusterIdentity", deprecatedManagerCredsWarning) 186 } 187 188 // Handle deleted clusters 189 if !azureCluster.DeletionTimestamp.IsZero() { 190 return acr.reconcileDelete(ctx, clusterScope) 191 } 192 193 // Handle non-deleted clusters 194 return acr.reconcileNormal(ctx, clusterScope) 195 } 196 197 func (acr *AzureClusterReconciler) reconcileNormal(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) { 198 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcileNormal") 199 defer done() 200 201 log.Info("Reconciling AzureCluster") 202 azureCluster := clusterScope.AzureCluster 203 204 // Register our finalizer immediately to avoid orphaning Azure resources on delete 205 needsPatch := controllerutil.AddFinalizer(azureCluster, infrav1.ClusterFinalizer) 206 // Register the block-move annotation immediately to avoid moving un-paused ASO resources 207 needsPatch = AddBlockMoveAnnotation(azureCluster) || needsPatch 208 if needsPatch { 209 if err := clusterScope.PatchObject(ctx); err != nil { 210 return reconcile.Result{}, err 211 } 212 } 213 214 acs, err := acr.createAzureClusterService(clusterScope) 215 if err != nil { 216 return reconcile.Result{}, errors.Wrap(err, "failed to create a new AzureClusterReconciler") 217 } 218 219 if err := acs.Reconcile(ctx); err != nil { 220 // Handle terminal & transient errors 221 var reconcileError azure.ReconcileError 222 if errors.As(err, &reconcileError) { 223 if reconcileError.IsTerminal() { 224 acr.Recorder.Eventf(clusterScope.AzureCluster, corev1.EventTypeWarning, "ReconcileError", errors.Wrapf(err, "failed to reconcile AzureCluster").Error()) 225 log.Error(err, "failed to reconcile AzureCluster", "name", clusterScope.ClusterName()) 226 conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, "") 227 return reconcile.Result{}, nil 228 } 229 if reconcileError.IsTransient() { 230 if azure.IsOperationNotDoneError(reconcileError) { 231 log.V(2).Info(fmt.Sprintf("AzureCluster reconcile not done: %s", reconcileError.Error())) 232 } else { 233 log.V(2).Info(fmt.Sprintf("transient failure to reconcile AzureCluster, retrying: %s", reconcileError.Error())) 234 } 235 return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil 236 } 237 } 238 239 wrappedErr := errors.Wrap(err, "failed to reconcile cluster services") 240 acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "ClusterReconcilerNormalFailed", wrappedErr.Error()) 241 conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, wrappedErr.Error()) 242 return reconcile.Result{}, wrappedErr 243 } 244 245 // Set APIEndpoints so the Cluster API Cluster Controller can pull them 246 if azureCluster.Spec.ControlPlaneEndpoint.Host == "" { 247 azureCluster.Spec.ControlPlaneEndpoint.Host = clusterScope.APIServerHost() 248 } 249 if azureCluster.Spec.ControlPlaneEndpoint.Port == 0 { 250 azureCluster.Spec.ControlPlaneEndpoint.Port = clusterScope.APIServerPort() 251 } 252 253 // No errors, so mark us ready so the Cluster API Cluster Controller can pull it 254 azureCluster.Status.Ready = true 255 conditions.MarkTrue(azureCluster, infrav1.NetworkInfrastructureReadyCondition) 256 257 return reconcile.Result{}, nil 258 } 259 260 //nolint:unparam // Always returns an empty struct for reconcile.Result 261 func (acr *AzureClusterReconciler) reconcilePause(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) { 262 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcilePause") 263 defer done() 264 265 log.Info("Reconciling AzureCluster pause") 266 267 acs, err := acr.createAzureClusterService(clusterScope) 268 if err != nil { 269 return reconcile.Result{}, errors.Wrap(err, "failed to create a new azureClusterService") 270 } 271 272 if err := acs.Pause(ctx); err != nil { 273 return reconcile.Result{}, errors.Wrap(err, "failed to pause cluster services") 274 } 275 RemoveBlockMoveAnnotation(clusterScope.AzureCluster) 276 277 return reconcile.Result{}, nil 278 } 279 280 func (acr *AzureClusterReconciler) reconcileDelete(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) { 281 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureClusterReconciler.reconcileDelete") 282 defer done() 283 284 log.Info("Reconciling AzureCluster delete") 285 286 azureCluster := clusterScope.AzureCluster 287 288 acs, err := acr.createAzureClusterService(clusterScope) 289 if err != nil { 290 return reconcile.Result{}, errors.Wrap(err, "failed to create a new AzureClusterReconciler") 291 } 292 293 if err := acs.Delete(ctx); err != nil { 294 // Handle transient errors 295 var reconcileError azure.ReconcileError 296 if errors.As(err, &reconcileError) { 297 if reconcileError.IsTransient() { 298 if azure.IsOperationNotDoneError(reconcileError) { 299 log.V(2).Info(fmt.Sprintf("AzureCluster delete not done: %s", reconcileError.Error())) 300 } else { 301 log.V(2).Info("transient failure to delete AzureCluster, retrying") 302 } 303 return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil 304 } 305 } 306 307 wrappedErr := errors.Wrapf(err, "error deleting AzureCluster %s/%s", azureCluster.Namespace, azureCluster.Name) 308 acr.Recorder.Eventf(azureCluster, corev1.EventTypeWarning, "ClusterReconcilerDeleteFailed", wrappedErr.Error()) 309 conditions.MarkFalse(azureCluster, infrav1.NetworkInfrastructureReadyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) 310 return reconcile.Result{}, wrappedErr 311 } 312 313 // Cluster is deleted so remove the finalizer. 314 controllerutil.RemoveFinalizer(azureCluster, infrav1.ClusterFinalizer) 315 316 if azureCluster.Spec.IdentityRef != nil { 317 // Cluster is deleted so remove the identity finalizer. 318 err := RemoveClusterIdentityFinalizer(ctx, acr.Client, azureCluster, azureCluster.Spec.IdentityRef, infrav1.ClusterFinalizer) 319 if err != nil { 320 return reconcile.Result{}, err 321 } 322 } 323 324 return reconcile.Result{}, nil 325 }