sigs.k8s.io/cluster-api-provider-azure@v1.17.0/azure/services/aso/aso.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package aso 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "time" 24 25 asoannotations "github.com/Azure/azure-service-operator/v2/pkg/common/annotations" 26 "github.com/Azure/azure-service-operator/v2/pkg/genruntime" 27 "github.com/Azure/azure-service-operator/v2/pkg/genruntime/conditions" 28 jsonpatch "github.com/evanphx/json-patch/v5" 29 "github.com/google/go-cmp/cmp" 30 "github.com/pkg/errors" 31 apierrors "k8s.io/apimachinery/pkg/api/errors" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/runtime" 34 "k8s.io/apimachinery/pkg/runtime/serializer" 35 "k8s.io/apimachinery/pkg/util/yaml" 36 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 37 "sigs.k8s.io/cluster-api-provider-azure/azure" 38 "sigs.k8s.io/cluster-api-provider-azure/util/aso" 39 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 40 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 41 "sigs.k8s.io/controller-runtime/pkg/client" 42 "sigs.k8s.io/controller-runtime/pkg/client/apiutil" 43 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 44 ) 45 46 const ( 47 // prePauseReconcilePolicyAnnotation is the annotation key for the value of 48 // asoannotations.ReconcilePolicy that was set before pausing. 49 prePauseReconcilePolicyAnnotation = "sigs.k8s.io/cluster-api-provider-azure-pre-pause-reconcile-policy" 50 51 requeueInterval = 20 * time.Second 52 53 createOrUpdateFutureType = "ASOCreateOrUpdate" 54 deleteFutureType = "ASODelete" 55 ) 56 57 // reconciler is an implementation of the Reconciler interface. It handles creation 58 // and deletion of resources using ASO. 59 type reconciler[T genruntime.MetaObject] struct { 60 client.Client 61 62 clusterName string 63 owner client.Object 64 } 65 66 // New creates a new ASO reconciler. 67 func New[T genruntime.MetaObject](ctrlClient client.Client, clusterName string, owner client.Object) Reconciler[T] { 68 return &reconciler[T]{ 69 Client: ctrlClient, 70 clusterName: clusterName, 71 owner: owner, 72 } 73 } 74 75 // CreateOrUpdateResource implements the logic for creating a new or updating an 76 // existing resource with ASO. 77 func (r *reconciler[T]) CreateOrUpdateResource(ctx context.Context, spec azure.ASOResourceSpecGetter[T], serviceName string) (T, error) { 78 ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.CreateOrUpdateResource") 79 defer done() 80 81 resource := spec.ResourceRef() 82 resource.SetNamespace(r.owner.GetNamespace()) 83 resourceName := resource.GetName() 84 resourceNamespace := resource.GetNamespace() 85 86 log = log.WithValues("service", serviceName, "resource", resourceName, "namespace", resourceNamespace) 87 88 var readyErr error 89 var adopt bool 90 var existing T 91 var zero T // holds the zero value, to be returned with non-nil errors. 92 resourceExists := false 93 if err := r.Client.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil { 94 if !apierrors.IsNotFound(err) { 95 return zero, errors.Wrapf(err, "failed to get existing resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName) 96 } 97 log.V(2).Info("existing resource not found, will create a new one") 98 } else { 99 existing = resource 100 resourceExists = true 101 log.V(2).Info("successfully got existing resource") 102 103 if isOwned, err := isOwnedBy(resource, r.owner, r.Scheme()); err != nil { 104 return zero, err 105 } else if !isOwned && !hasLegacyOwnedByLabel(resource.GetLabels(), r.clusterName) { 106 log.V(4).Info("skipping reconcile for unmanaged resource") 107 return existing, nil 108 } 109 110 // Check if there is an ongoing long running operation. 111 conds := existing.GetConditions() 112 i, readyExists := conds.FindIndexByType(conditions.ConditionTypeReady) 113 if !readyExists { 114 return zero, azure.WithTransientError(errors.New("ready status unknown"), requeueInterval) 115 } 116 if cond := conds[i]; cond.Status != metav1.ConditionTrue { 117 switch { 118 case cond.Reason == conditions.ReasonAzureResourceNotFound.Name && 119 existing.GetAnnotations()[asoannotations.ReconcilePolicy] == string(asoannotations.ReconcilePolicySkip): 120 // This resource was originally created by CAPZ and a 121 // corresponding Azure resource has been found not to exist, so 122 // CAPZ will tell ASO to adopt the resource by setting its 123 // reconcile policy to "manage". This extra step is necessary to 124 // handle user-managed resources that already exist in Azure and 125 // should not be reconciled by ASO while ensuring they're still 126 // represented in ASO. 127 log.V(2).Info("resource not found in Azure and \"skip\" reconcile-policy set, adopting") 128 // Don't set readyErr so the resource can be adopted with an 129 // update instead of returning early. 130 adopt = true 131 case cond.Reason == conditions.ReasonReconciling.Name: 132 readyErr = azure.NewOperationNotDoneError(&infrav1.Future{ 133 Type: createOrUpdateFutureType, 134 ResourceGroup: existing.GetNamespace(), 135 Name: existing.GetName(), 136 }) 137 default: 138 readyErr = fmt.Errorf("resource is not Ready: %s", conds[i].Message) 139 } 140 141 if readyErr != nil { 142 if conds[i].Severity == conditions.ConditionSeverityError { 143 readyErr = azure.WithTerminalError(readyErr) 144 } else { 145 readyErr = azure.WithTransientError(readyErr, requeueInterval) 146 } 147 } 148 } 149 } 150 151 // Construct parameters using the resource spec and information from the existing resource, if there is one. 152 var existingCopy T 153 if resourceExists { 154 existingCopy = existing.DeepCopyObject().(T) 155 } 156 parameters, err := PatchedParameters(ctx, r.Scheme(), spec, existingCopy) 157 if err != nil { 158 return zero, errors.Wrapf(err, "failed to get desired parameters for resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName) 159 } 160 161 parameters.SetName(resourceName) 162 parameters.SetNamespace(resourceNamespace) 163 164 if err := controllerutil.SetControllerReference(r.owner, parameters, r.Client.Scheme()); err != nil { 165 return zero, errors.Wrap(err, "failed to set owner ref") 166 } 167 168 if t, ok := spec.(TagsGetterSetter[T]); ok { 169 if err := reconcileTags(t, existing, resourceExists, parameters); err != nil { 170 return zero, errors.Wrap(err, "failed to reconcile tags") 171 } 172 } 173 174 labels := parameters.GetLabels() 175 if labels == nil { 176 labels = make(map[string]string) 177 } 178 labels[clusterv1.ClusterNameLabel] = r.clusterName 179 180 annotations := parameters.GetAnnotations() 181 if annotations == nil { 182 annotations = make(map[string]string) 183 } 184 185 if prevReconcilePolicy, ok := annotations[prePauseReconcilePolicyAnnotation]; ok { 186 annotations[asoannotations.ReconcilePolicy] = prevReconcilePolicy 187 delete(annotations, prePauseReconcilePolicyAnnotation) 188 } 189 if !resourceExists { 190 // Create the ASO resource with "skip" in case a matching resource 191 // already exists in Azure, in which case CAPZ will assume it is managed 192 // by the user and ASO should not actively reconcile changes to the ASO 193 // resource. In the canonical "entirely managed by CAPZ" case, the next 194 // reconciliation will reveal the resource does not already exist in 195 // Azure and the ASO resource will be adopted by changing this 196 // annotation to "manage". 197 annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicySkip) 198 } else { 199 adopt = adopt || spec.WasManaged(existing) 200 } 201 if adopt { 202 annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicyManage) 203 } 204 205 // Set the secret name annotation in order to leverage the ASO resource credential scope as defined in 206 // https://azure.github.io/azure-service-operator/guide/authentication/credential-scope/#resource-scope. 207 annotations[asoannotations.PerResourceSecret] = aso.GetASOSecretName(r.clusterName) 208 209 if len(labels) == 0 { 210 labels = nil 211 } 212 parameters.SetLabels(labels) 213 if len(annotations) == 0 { 214 annotations = nil 215 } 216 parameters.SetAnnotations(annotations) 217 218 diff := cmp.Diff(existing, parameters) 219 if diff == "" { 220 if readyErr != nil { 221 // Only return this error when the resource is up to date in order to permit updates from 222 // Parameters which may fix the resource's current state. 223 return zero, readyErr 224 } 225 log.V(2).Info("resource up to date") 226 return existing, nil 227 } 228 log.V(2).Info("creating or updating resource", "diff", diff) 229 return r.createOrUpdateResource(ctx, existing, parameters, resourceExists, serviceName) 230 } 231 232 // PatchedParameters returns the Parameters of spec with patches applied. 233 func PatchedParameters[T genruntime.MetaObject](ctx context.Context, scheme *runtime.Scheme, spec azure.ASOResourceSpecGetter[T], existing T) (T, error) { 234 var zero T // to be returned with non-nil errors 235 parameters, err := spec.Parameters(ctx, existing) 236 if err != nil { 237 return zero, err 238 } 239 return applyPatches(scheme, spec, parameters) 240 } 241 242 func applyPatches[T genruntime.MetaObject](scheme *runtime.Scheme, spec azure.ASOResourceSpecGetter[T], parameters T) (T, error) { 243 p, ok := spec.(Patcher) 244 if !ok { 245 return parameters, nil 246 } 247 248 var zero T // to be returned with non-nil errors 249 250 gvk, err := apiutil.GVKForObject(parameters, scheme) 251 if err != nil { 252 return zero, errors.Wrap(err, "failed to get GroupVersionKind for object") 253 } 254 255 parameters.GetObjectKind().SetGroupVersionKind(gvk) 256 paramData, err := json.Marshal(parameters) 257 if err != nil { 258 return zero, errors.Wrap(err, "failed to marshal JSON for patch") 259 } 260 261 for i, extraPatch := range p.ExtraPatches() { 262 jsonPatch, err := yaml.ToJSON([]byte(extraPatch)) 263 if err != nil { 264 return zero, errors.Wrapf(err, "failed to convert patch at index %d to JSON", i) 265 } 266 paramData, err = jsonpatch.MergePatch(paramData, jsonPatch) 267 if err != nil { 268 return zero, errors.Wrapf(err, "failed to apply patch at index %d", i) 269 } 270 } 271 272 decoder := serializer.NewCodecFactory(scheme).UniversalDeserializer() 273 obj, _, err := decoder.Decode(paramData, nil, nil) 274 if err != nil { 275 return zero, errors.Wrap(err, "failed to decode object") 276 } 277 278 t, ok := obj.(T) 279 if !ok { 280 return zero, fmt.Errorf("decoded patched object is %T, not %T", obj, parameters) 281 } 282 283 return t, nil 284 } 285 286 func (r *reconciler[T]) createOrUpdateResource(ctx context.Context, existing T, parameters client.Object, resourceExists bool, serviceName string) (T, error) { 287 var zero T 288 var err error 289 var logMessageVerbPrefix string 290 if resourceExists { 291 logMessageVerbPrefix = "updat" 292 err = r.Client.Patch(ctx, parameters, client.MergeFrom(existing)) 293 } else { 294 logMessageVerbPrefix = "creat" 295 err = r.Client.Create(ctx, parameters) 296 } 297 if err == nil { 298 // Resources need to be requeued to wait for the create or update to finish. 299 return zero, azure.WithTransientError(azure.NewOperationNotDoneError(&infrav1.Future{ 300 Type: createOrUpdateFutureType, 301 ResourceGroup: parameters.GetNamespace(), 302 Name: parameters.GetName(), 303 }), requeueInterval) 304 } 305 return zero, errors.Wrapf(err, fmt.Sprintf("failed to %se resource %s/%s (service: %s)", logMessageVerbPrefix, parameters.GetNamespace(), parameters.GetName(), serviceName)) 306 } 307 308 // DeleteResource implements the logic for deleting a resource Asynchronously. 309 func (r *reconciler[T]) DeleteResource(ctx context.Context, resource T, serviceName string) (err error) { 310 ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.DeleteResource") 311 defer done() 312 313 resource.SetNamespace(r.owner.GetNamespace()) 314 resourceName := resource.GetName() 315 resourceNamespace := resource.GetNamespace() 316 317 log = log.WithValues("service", serviceName, "resource", resourceName, "namespace", resourceNamespace) 318 319 managed, err := IsManaged(ctx, r.Client, resource, r.owner) 320 if apierrors.IsNotFound(err) { 321 // already deleted 322 log.V(2).Info("successfully deleted resource") 323 return nil 324 } 325 if err != nil { 326 return errors.Wrap(err, "failed to determine if resource is managed") 327 } 328 if !managed { 329 log.V(4).Info("skipping delete for unmanaged resource") 330 return nil 331 } 332 333 log.V(2).Info("deleting resource") 334 err = r.Client.Delete(ctx, resource) 335 if err != nil { 336 if apierrors.IsNotFound(err) { 337 // already deleted 338 log.V(2).Info("successfully deleted resource") 339 return nil 340 } 341 return errors.Wrapf(err, "failed to delete resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName) 342 } 343 344 return azure.WithTransientError(azure.NewOperationNotDoneError(&infrav1.Future{ 345 Type: deleteFutureType, 346 ResourceGroup: resourceNamespace, 347 Name: resourceName, 348 }), requeueInterval) 349 } 350 351 // IsManaged returns whether the ASO resource referred to by spec was created by 352 // CAPZ and therefore whether CAPZ should manage its lifecycle. 353 func IsManaged[T genruntime.MetaObject](ctx context.Context, ctrlClient client.Client, resource T, owner client.Object) (bool, error) { 354 ctx, _, done := tele.StartSpanWithLogger(ctx, "services.aso.IsManaged") 355 defer done() 356 357 resource.SetNamespace(owner.GetNamespace()) 358 359 err := ctrlClient.Get(ctx, client.ObjectKeyFromObject(resource), resource) 360 if err != nil { 361 return false, errors.Wrap(err, "error getting resource") 362 } 363 364 return isOwnedBy(resource, owner, ctrlClient.Scheme()) 365 } 366 367 func isOwnedBy(resource client.Object, owner client.Object, scheme *runtime.Scheme) (bool, error) { 368 ownerGVK, err := apiutil.GVKForObject(owner, scheme) 369 if err != nil { 370 return false, err 371 } 372 existingOwner := metav1.GetControllerOf(resource) 373 return existingOwner != nil && 374 existingOwner.APIVersion == ownerGVK.GroupVersion().String() && 375 existingOwner.Kind == ownerGVK.Kind && 376 existingOwner.Name == owner.GetName(), nil 377 } 378 379 func hasLegacyOwnedByLabel(labels map[string]string, clusterName string) bool { 380 //nolint:staticcheck // Referencing this deprecated value is required for backwards compatibility. 381 return labels[infrav1.OwnedByClusterLabelKey] == clusterName 382 } 383 384 // PauseResource pauses an ASO resource by updating its `reconcile-policy` to `skip`. 385 func (r *reconciler[T]) PauseResource(ctx context.Context, resource T, serviceName string) error { 386 ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.PauseResource") 387 defer done() 388 389 resource.SetNamespace(r.owner.GetNamespace()) 390 391 log = log.WithValues("service", serviceName, "resource", resource.GetName(), "namespace", resource.GetNamespace()) 392 393 if err := r.Client.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil { 394 return err 395 } 396 if isOwned, err := isOwnedBy(resource, r.owner, r.Scheme()); err != nil { 397 return err 398 } else if !isOwned { 399 log.V(4).Info("Skipping pause of unmanaged resource") 400 return nil 401 } 402 403 annotations := resource.GetAnnotations() 404 if _, exists := annotations[prePauseReconcilePolicyAnnotation]; exists { 405 log.V(4).Info("resource is already paused") 406 return nil 407 } 408 409 log.V(4).Info("Pausing resource") 410 before := resource.DeepCopyObject().(genruntime.MetaObject) 411 412 if annotations == nil { 413 annotations = make(map[string]string, 2) 414 } 415 annotations[prePauseReconcilePolicyAnnotation] = annotations[asoannotations.ReconcilePolicy] 416 annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicySkip) 417 resource.SetAnnotations(annotations) 418 419 return r.Client.Patch(ctx, resource, client.MergeFrom(before)) 420 }