sigs.k8s.io/cluster-api-provider-aws@v1.5.5/controllers/awscluster_controller.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 "time" 24 25 "github.com/go-logr/logr" 26 "github.com/google/go-cmp/cmp" 27 "github.com/pkg/errors" 28 apierrors "k8s.io/apimachinery/pkg/api/errors" 29 "k8s.io/apimachinery/pkg/types" 30 "k8s.io/client-go/tools/record" 31 ctrl "sigs.k8s.io/controller-runtime" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 "sigs.k8s.io/controller-runtime/pkg/controller" 34 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 35 "sigs.k8s.io/controller-runtime/pkg/event" 36 "sigs.k8s.io/controller-runtime/pkg/handler" 37 "sigs.k8s.io/controller-runtime/pkg/predicate" 38 "sigs.k8s.io/controller-runtime/pkg/reconcile" 39 "sigs.k8s.io/controller-runtime/pkg/source" 40 41 infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1" 42 "sigs.k8s.io/cluster-api-provider-aws/feature" 43 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope" 44 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services" 45 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/ec2" 46 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/elb" 47 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/gc" 48 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/instancestate" 49 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/network" 50 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/s3" 51 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/securitygroup" 52 infrautilconditions "sigs.k8s.io/cluster-api-provider-aws/util/conditions" 53 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 54 "sigs.k8s.io/cluster-api/util" 55 capiannotations "sigs.k8s.io/cluster-api/util/annotations" 56 "sigs.k8s.io/cluster-api/util/conditions" 57 "sigs.k8s.io/cluster-api/util/patch" 58 "sigs.k8s.io/cluster-api/util/predicates" 59 ) 60 61 var defaultAWSSecurityGroupRoles = []infrav1.SecurityGroupRole{ 62 infrav1.SecurityGroupAPIServerLB, 63 infrav1.SecurityGroupLB, 64 infrav1.SecurityGroupControlPlane, 65 infrav1.SecurityGroupNode, 66 } 67 68 // AWSClusterReconciler reconciles a AwsCluster object. 69 type AWSClusterReconciler struct { 70 client.Client 71 Recorder record.EventRecorder 72 ec2ServiceFactory func(scope.EC2Scope) services.EC2Interface 73 networkServiceFactory func(scope.ClusterScope) services.NetworkInterface 74 elbServiceFactory func(scope.ELBScope) services.ELBInterface 75 securityGroupFactory func(scope.ClusterScope) services.SecurityGroupInterface 76 Endpoints []scope.ServiceEndpoint 77 WatchFilterValue string 78 ExternalResourceGC bool 79 } 80 81 // getEC2Service factory func is added for testing purpose so that we can inject mocked EC2Service to the AWSClusterReconciler. 82 func (r *AWSClusterReconciler) getEC2Service(scope scope.EC2Scope) services.EC2Interface { 83 if r.ec2ServiceFactory != nil { 84 return r.ec2ServiceFactory(scope) 85 } 86 return ec2.NewService(scope) 87 } 88 89 // getELBService factory func is added for testing purpose so that we can inject mocked ELBService to the AWSClusterReconciler. 90 func (r *AWSClusterReconciler) getELBService(scope scope.ELBScope) services.ELBInterface { 91 if r.elbServiceFactory != nil { 92 return r.elbServiceFactory(scope) 93 } 94 return elb.NewService(scope) 95 } 96 97 // getNetworkService factory func is added for testing purpose so that we can inject mocked NetworkService to the AWSClusterReconciler. 98 func (r *AWSClusterReconciler) getNetworkService(scope scope.ClusterScope) services.NetworkInterface { 99 if r.networkServiceFactory != nil { 100 return r.networkServiceFactory(scope) 101 } 102 return network.NewService(&scope) 103 } 104 105 // securityGroupRolesForCluster returns the security group roles determined by the cluster configuration. 106 func securityGroupRolesForCluster(scope scope.ClusterScope) []infrav1.SecurityGroupRole { 107 // Copy to ensure we do not modify the package-level variable. 108 roles := make([]infrav1.SecurityGroupRole, len(defaultAWSSecurityGroupRoles)) 109 copy(roles, defaultAWSSecurityGroupRoles) 110 111 if scope.Bastion().Enabled { 112 roles = append(roles, infrav1.SecurityGroupBastion) 113 } 114 return roles 115 } 116 117 // getSecurityGroupService factory func is added for testing purpose so that we can inject mocked SecurityGroupService to the AWSClusterReconciler. 118 func (r *AWSClusterReconciler) getSecurityGroupService(scope scope.ClusterScope) services.SecurityGroupInterface { 119 if r.securityGroupFactory != nil { 120 return r.securityGroupFactory(scope) 121 } 122 return securitygroup.NewService(&scope, securityGroupRolesForCluster(scope)) 123 } 124 125 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters,verbs=get;list;watch;create;update;patch;delete 126 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters/status,verbs=get;update;patch 127 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch 128 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusterroleidentities;awsclusterstaticidentities,verbs=get;list;watch 129 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclustercontrolleridentities,verbs=get;list;watch;create; 130 131 func (r *AWSClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 132 log := ctrl.LoggerFrom(ctx) 133 134 // Fetch the AWSCluster instance 135 awsCluster := &infrav1.AWSCluster{} 136 err := r.Get(ctx, req.NamespacedName, awsCluster) 137 if err != nil { 138 if apierrors.IsNotFound(err) { 139 return reconcile.Result{}, nil 140 } 141 return reconcile.Result{}, err 142 } 143 144 // Fetch the Cluster. 145 cluster, err := util.GetOwnerCluster(ctx, r.Client, awsCluster.ObjectMeta) 146 if err != nil { 147 return reconcile.Result{}, err 148 } 149 150 if cluster == nil { 151 log.Info("Cluster Controller has not yet set OwnerRef") 152 return reconcile.Result{}, nil 153 } 154 155 if capiannotations.IsPaused(cluster, awsCluster) { 156 log.Info("AWSCluster or linked Cluster is marked as paused. Won't reconcile") 157 return reconcile.Result{}, nil 158 } 159 160 log = log.WithValues("cluster", cluster.Name) 161 helper, err := patch.NewHelper(awsCluster, r.Client) 162 if err != nil { 163 return reconcile.Result{}, errors.Wrap(err, "failed to init patch helper") 164 } 165 166 defer func() { 167 e := helper.Patch( 168 context.TODO(), 169 awsCluster, 170 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 171 infrav1.PrincipalCredentialRetrievedCondition, 172 infrav1.PrincipalUsageAllowedCondition, 173 infrav1.LoadBalancerReadyCondition, 174 }}) 175 if e != nil { 176 fmt.Println(e.Error()) 177 } 178 }() 179 180 // Create the scope. 181 clusterScope, err := scope.NewClusterScope(scope.ClusterScopeParams{ 182 Client: r.Client, 183 Logger: &log, 184 Cluster: cluster, 185 AWSCluster: awsCluster, 186 ControllerName: "awscluster", 187 Endpoints: r.Endpoints, 188 }) 189 if err != nil { 190 return reconcile.Result{}, errors.Errorf("failed to create scope: %+v", err) 191 } 192 193 // Always close the scope when exiting this function so we can persist any AWSCluster changes. 194 defer func() { 195 if err := clusterScope.Close(); err != nil && reterr == nil { 196 reterr = err 197 } 198 }() 199 200 // Handle deleted clusters 201 if !awsCluster.DeletionTimestamp.IsZero() { 202 return r.reconcileDelete(ctx, clusterScope) 203 } 204 205 // Handle non-deleted clusters 206 return r.reconcileNormal(clusterScope) 207 } 208 209 func (r *AWSClusterReconciler) reconcileDelete(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) { 210 clusterScope.Info("Reconciling AWSCluster delete") 211 212 ec2svc := r.getEC2Service(clusterScope) 213 elbsvc := r.getELBService(clusterScope) 214 networkSvc := r.getNetworkService(*clusterScope) 215 sgService := r.getSecurityGroupService(*clusterScope) 216 s3Service := s3.NewService(clusterScope) 217 218 if feature.Gates.Enabled(feature.EventBridgeInstanceState) { 219 instancestateSvc := instancestate.NewService(clusterScope) 220 if err := instancestateSvc.DeleteEC2Events(); err != nil { 221 // Not deleting the events isn't critical to cluster deletion 222 clusterScope.Error(err, "non-fatal: failed to delete EventBridge notifications") 223 } 224 } 225 226 if err := elbsvc.DeleteLoadbalancers(); err != nil { 227 clusterScope.Error(err, "error deleting load balancer") 228 return reconcile.Result{}, err 229 } 230 231 if err := ec2svc.DeleteBastion(); err != nil { 232 clusterScope.Error(err, "error deleting bastion") 233 return reconcile.Result{}, err 234 } 235 236 if err := sgService.DeleteSecurityGroups(); err != nil { 237 clusterScope.Error(err, "error deleting security groups") 238 return reconcile.Result{}, err 239 } 240 241 if r.ExternalResourceGC { 242 gcSvc := gc.NewService(clusterScope) 243 if gcErr := gcSvc.ReconcileDelete(ctx); gcErr != nil { 244 return reconcile.Result{}, fmt.Errorf("failed delete reconcile for gc service: %w", gcErr) 245 } 246 } 247 248 if err := networkSvc.DeleteNetwork(); err != nil { 249 clusterScope.Error(err, "error deleting network") 250 return reconcile.Result{}, err 251 } 252 253 if err := s3Service.DeleteBucket(); err != nil { 254 return reconcile.Result{}, errors.Wrapf(err, "error deleting S3 Bucket") 255 } 256 257 // Cluster is deleted so remove the finalizer. 258 controllerutil.RemoveFinalizer(clusterScope.AWSCluster, infrav1.ClusterFinalizer) 259 260 return reconcile.Result{}, nil 261 } 262 263 func (r *AWSClusterReconciler) reconcileNormal(clusterScope *scope.ClusterScope) (reconcile.Result, error) { 264 clusterScope.Info("Reconciling AWSCluster") 265 266 awsCluster := clusterScope.AWSCluster 267 268 // If the AWSCluster doesn't have our finalizer, add it. 269 controllerutil.AddFinalizer(awsCluster, infrav1.ClusterFinalizer) 270 // Register the finalizer immediately to avoid orphaning AWS resources on delete 271 if err := clusterScope.PatchObject(); err != nil { 272 return reconcile.Result{}, err 273 } 274 275 ec2Service := r.getEC2Service(clusterScope) 276 elbService := r.getELBService(clusterScope) 277 networkSvc := r.getNetworkService(*clusterScope) 278 sgService := r.getSecurityGroupService(*clusterScope) 279 s3Service := s3.NewService(clusterScope) 280 281 if err := networkSvc.ReconcileNetwork(); err != nil { 282 clusterScope.Error(err, "failed to reconcile network") 283 return reconcile.Result{}, err 284 } 285 286 // CNI related security groups gets deleted from the AWSClusters created prior to networkSpec.cni defaulting (5.5) after upgrading controllers. 287 // https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues/2084 288 // TODO: Remove this after v1aplha4 289 clusterScope.AWSCluster.Default() 290 291 if err := sgService.ReconcileSecurityGroups(); err != nil { 292 clusterScope.Error(err, "failed to reconcile security groups") 293 conditions.MarkFalse(awsCluster, infrav1.ClusterSecurityGroupsReadyCondition, infrav1.ClusterSecurityGroupReconciliationFailedReason, infrautilconditions.ErrorConditionAfterInit(clusterScope.ClusterObj()), err.Error()) 294 return reconcile.Result{}, err 295 } 296 297 if err := ec2Service.ReconcileBastion(); err != nil { 298 conditions.MarkFalse(awsCluster, infrav1.BastionHostReadyCondition, infrav1.BastionHostFailedReason, infrautilconditions.ErrorConditionAfterInit(clusterScope.ClusterObj()), err.Error()) 299 clusterScope.Error(err, "failed to reconcile bastion host") 300 return reconcile.Result{}, err 301 } 302 303 if feature.Gates.Enabled(feature.EventBridgeInstanceState) { 304 instancestateSvc := instancestate.NewService(clusterScope) 305 if err := instancestateSvc.ReconcileEC2Events(); err != nil { 306 // non fatal error, so we continue 307 clusterScope.Error(err, "non-fatal: failed to set up EventBridge") 308 } 309 } 310 311 if err := elbService.ReconcileLoadbalancers(); err != nil { 312 clusterScope.Error(err, "failed to reconcile load balancer") 313 conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.LoadBalancerFailedReason, infrautilconditions.ErrorConditionAfterInit(clusterScope.ClusterObj()), err.Error()) 314 return reconcile.Result{}, err 315 } 316 317 if err := s3Service.ReconcileBucket(); err != nil { 318 conditions.MarkFalse(awsCluster, infrav1.S3BucketReadyCondition, infrav1.S3BucketFailedReason, clusterv1.ConditionSeverityError, err.Error()) 319 return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile S3 Bucket for AWSCluster %s/%s", awsCluster.Namespace, awsCluster.Name) 320 } 321 322 if awsCluster.Status.Network.APIServerELB.DNSName == "" { 323 conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameReason, clusterv1.ConditionSeverityInfo, "") 324 clusterScope.Info("Waiting on API server ELB DNS name") 325 return reconcile.Result{RequeueAfter: 15 * time.Second}, nil 326 } 327 328 if _, err := net.LookupIP(awsCluster.Status.Network.APIServerELB.DNSName); err != nil { 329 conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameResolveReason, clusterv1.ConditionSeverityInfo, "") 330 clusterScope.Info("Waiting on API server ELB DNS name to resolve") 331 return reconcile.Result{RequeueAfter: 15 * time.Second}, nil // nolint:nilerr 332 } 333 conditions.MarkTrue(awsCluster, infrav1.LoadBalancerReadyCondition) 334 335 awsCluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{ 336 Host: awsCluster.Status.Network.APIServerELB.DNSName, 337 Port: clusterScope.APIServerPort(), 338 } 339 340 for _, subnet := range clusterScope.Subnets().FilterPrivate() { 341 found := false 342 for _, az := range awsCluster.Status.Network.APIServerELB.AvailabilityZones { 343 if az == subnet.AvailabilityZone { 344 found = true 345 break 346 } 347 } 348 349 clusterScope.SetFailureDomain(subnet.AvailabilityZone, clusterv1.FailureDomainSpec{ 350 ControlPlane: found, 351 }) 352 } 353 354 awsCluster.Status.Ready = true 355 return reconcile.Result{}, nil 356 } 357 358 func (r *AWSClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 359 log := ctrl.LoggerFrom(ctx) 360 controller, err := ctrl.NewControllerManagedBy(mgr). 361 WithOptions(options). 362 For(&infrav1.AWSCluster{}). 363 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue)). 364 WithEventFilter( 365 predicate.Funcs{ 366 // Avoid reconciling if the event triggering the reconciliation is related to incremental status updates 367 // for AWSCluster resources only 368 UpdateFunc: func(e event.UpdateEvent) bool { 369 if e.ObjectOld.GetObjectKind().GroupVersionKind().Kind != "AWSCluster" { 370 return true 371 } 372 373 oldCluster := e.ObjectOld.(*infrav1.AWSCluster).DeepCopy() 374 newCluster := e.ObjectNew.(*infrav1.AWSCluster).DeepCopy() 375 376 oldCluster.Status = infrav1.AWSClusterStatus{} 377 newCluster.Status = infrav1.AWSClusterStatus{} 378 379 oldCluster.ObjectMeta.ResourceVersion = "" 380 newCluster.ObjectMeta.ResourceVersion = "" 381 382 return !cmp.Equal(oldCluster, newCluster) 383 }, 384 }, 385 ). 386 WithEventFilter(predicates.ResourceIsNotExternallyManaged(log)). 387 Build(r) 388 if err != nil { 389 return errors.Wrap(err, "error creating controller") 390 } 391 392 return controller.Watch( 393 &source.Kind{Type: &clusterv1.Cluster{}}, 394 handler.EnqueueRequestsFromMapFunc(r.requeueAWSClusterForUnpausedCluster(ctx, log)), 395 predicates.ClusterUnpaused(log), 396 ) 397 } 398 399 func (r *AWSClusterReconciler) requeueAWSClusterForUnpausedCluster(ctx context.Context, log logr.Logger) handler.MapFunc { 400 return func(o client.Object) []ctrl.Request { 401 c, ok := o.(*clusterv1.Cluster) 402 if !ok { 403 panic(fmt.Sprintf("Expected a Cluster but got a %T", o)) 404 } 405 406 log := log.WithValues("objectMapper", "clusterToAWSCluster", "namespace", c.Namespace, "cluster", c.Name) 407 408 // Don't handle deleted clusters 409 if !c.ObjectMeta.DeletionTimestamp.IsZero() { 410 log.V(4).Info("Cluster has a deletion timestamp, skipping mapping.") 411 return nil 412 } 413 414 // Make sure the ref is set 415 if c.Spec.InfrastructureRef == nil { 416 log.V(4).Info("Cluster does not have an InfrastructureRef, skipping mapping.") 417 return nil 418 } 419 420 if c.Spec.InfrastructureRef.GroupVersionKind().Kind != "AWSCluster" { 421 log.V(4).Info("Cluster has an InfrastructureRef for a different type, skipping mapping.") 422 return nil 423 } 424 425 awsCluster := &infrav1.AWSCluster{} 426 key := types.NamespacedName{Namespace: c.Spec.InfrastructureRef.Namespace, Name: c.Spec.InfrastructureRef.Name} 427 428 if err := r.Get(ctx, key, awsCluster); err != nil { 429 log.V(4).Error(err, "Failed to get AWS cluster") 430 return nil 431 } 432 433 if capiannotations.IsExternallyManaged(awsCluster) { 434 log.V(4).Info("AWSCluster is externally managed, skipping mapping.") 435 return nil 436 } 437 438 log.V(4).Info("Adding request.", "awsCluster", c.Spec.InfrastructureRef.Name) 439 return []ctrl.Request{ 440 { 441 NamespacedName: client.ObjectKey{Namespace: c.Namespace, Name: c.Spec.InfrastructureRef.Name}, 442 }, 443 } 444 } 445 }