sigs.k8s.io/cluster-api-provider-azure@v1.14.3/azure/services/aso/aso.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package aso 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "time" 24 25 asoannotations "github.com/Azure/azure-service-operator/v2/pkg/common/annotations" 26 "github.com/Azure/azure-service-operator/v2/pkg/genruntime" 27 "github.com/Azure/azure-service-operator/v2/pkg/genruntime/conditions" 28 jsonpatch "github.com/evanphx/json-patch/v5" 29 "github.com/google/go-cmp/cmp" 30 "github.com/pkg/errors" 31 apierrors "k8s.io/apimachinery/pkg/api/errors" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/runtime" 34 "k8s.io/apimachinery/pkg/runtime/schema" 35 "k8s.io/apimachinery/pkg/runtime/serializer" 36 "k8s.io/apimachinery/pkg/util/yaml" 37 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 38 "sigs.k8s.io/cluster-api-provider-azure/azure" 39 "sigs.k8s.io/cluster-api-provider-azure/util/aso" 40 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 41 "sigs.k8s.io/controller-runtime/pkg/client" 42 "sigs.k8s.io/controller-runtime/pkg/client/apiutil" 43 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 44 ) 45 46 const ( 47 // prePauseReconcilePolicyAnnotation is the annotation key for the value of 48 // asoannotations.ReconcilePolicy that was set before pausing. 49 prePauseReconcilePolicyAnnotation = "sigs.k8s.io/cluster-api-provider-azure-pre-pause-reconcile-policy" 50 51 requeueInterval = 20 * time.Second 52 53 createOrUpdateFutureType = "ASOCreateOrUpdate" 54 deleteFutureType = "ASODelete" 55 ) 56 57 // reconciler is an implementation of the Reconciler interface. It handles creation 58 // and deletion of resources using ASO. 59 type reconciler[T genruntime.MetaObject] struct { 60 client.Client 61 62 clusterName string 63 owner client.Object 64 } 65 66 // New creates a new ASO reconciler. 67 func New[T genruntime.MetaObject](ctrlClient client.Client, clusterName string, owner client.Object) Reconciler[T] { 68 return &reconciler[T]{ 69 Client: ctrlClient, 70 clusterName: clusterName, 71 owner: owner, 72 } 73 } 74 75 // CreateOrUpdateResource implements the logic for creating a new or updating an 76 // existing resource with ASO. 77 func (r *reconciler[T]) CreateOrUpdateResource(ctx context.Context, spec azure.ASOResourceSpecGetter[T], serviceName string) (T, error) { 78 ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.CreateOrUpdateResource") 79 defer done() 80 81 resource := spec.ResourceRef() 82 resource.SetNamespace(r.owner.GetNamespace()) 83 resourceName := resource.GetName() 84 resourceNamespace := resource.GetNamespace() 85 86 log = log.WithValues("service", serviceName, "resource", resourceName, "namespace", resourceNamespace) 87 88 var readyErr error 89 var adopt bool 90 var existing T 91 var zero T // holds the zero value, to be returned with non-nil errors. 92 resourceExists := false 93 if err := r.Client.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil { 94 if !apierrors.IsNotFound(err) { 95 return zero, errors.Wrapf(err, "failed to get existing resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName) 96 } 97 log.V(2).Info("existing resource not found, will create a new one") 98 } else { 99 existing = resource 100 resourceExists = true 101 log.V(2).Info("successfully got existing resource") 102 103 if isOwned, err := isOwnedBy(resource, r.owner, r.Scheme()); err != nil { 104 return zero, err 105 } else if !isOwned && !hasLegacyOwnedByLabel(resource.GetLabels(), r.clusterName) { 106 log.V(4).Info("skipping reconcile for unmanaged resource") 107 return existing, nil 108 } 109 110 // Check if there is an ongoing long running operation. 111 conds := existing.GetConditions() 112 i, readyExists := conds.FindIndexByType(conditions.ConditionTypeReady) 113 if !readyExists { 114 return zero, azure.WithTransientError(errors.New("ready status unknown"), requeueInterval) 115 } 116 if cond := conds[i]; cond.Status != metav1.ConditionTrue { 117 switch { 118 case cond.Reason == conditions.ReasonAzureResourceNotFound.Name && 119 existing.GetAnnotations()[asoannotations.ReconcilePolicy] == string(asoannotations.ReconcilePolicySkip): 120 // This resource was originally created by CAPZ and a 121 // corresponding Azure resource has been found not to exist, so 122 // CAPZ will tell ASO to adopt the resource by setting its 123 // reconcile policy to "manage". This extra step is necessary to 124 // handle user-managed resources that already exist in Azure and 125 // should not be reconciled by ASO while ensuring they're still 126 // represented in ASO. 127 log.V(2).Info("resource not found in Azure and \"skip\" reconcile-policy set, adopting") 128 // Don't set readyErr so the resource can be adopted with an 129 // update instead of returning early. 130 adopt = true 131 case cond.Reason == conditions.ReasonReconciling.Name: 132 readyErr = azure.NewOperationNotDoneError(&infrav1.Future{ 133 Type: createOrUpdateFutureType, 134 ResourceGroup: existing.GetNamespace(), 135 Name: existing.GetName(), 136 }) 137 default: 138 readyErr = fmt.Errorf("resource is not Ready: %s", conds[i].Message) 139 } 140 141 if readyErr != nil { 142 if conds[i].Severity == conditions.ConditionSeverityError { 143 readyErr = azure.WithTerminalError(readyErr) 144 } else { 145 readyErr = azure.WithTransientError(readyErr, requeueInterval) 146 } 147 } 148 } 149 } 150 151 // Construct parameters using the resource spec and information from the existing resource, if there is one. 152 var existingCopy T 153 if resourceExists { 154 existingCopy = existing.DeepCopyObject().(T) 155 } 156 parameters, err := PatchedParameters(ctx, r.Scheme(), spec, existingCopy) 157 if err != nil { 158 return zero, errors.Wrapf(err, "failed to get desired parameters for resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName) 159 } 160 161 parameters.SetName(resourceName) 162 parameters.SetNamespace(resourceNamespace) 163 164 if err := controllerutil.SetControllerReference(r.owner, parameters, r.Client.Scheme()); err != nil { 165 return zero, errors.Wrap(err, "failed to set owner ref") 166 } 167 168 if t, ok := spec.(TagsGetterSetter[T]); ok { 169 if err := reconcileTags(t, existing, resourceExists, parameters); err != nil { 170 return zero, errors.Wrap(err, "failed to reconcile tags") 171 } 172 } 173 174 labels := parameters.GetLabels() 175 if labels == nil { 176 labels = make(map[string]string) 177 } 178 annotations := parameters.GetAnnotations() 179 if annotations == nil { 180 annotations = make(map[string]string) 181 } 182 183 if prevReconcilePolicy, ok := annotations[prePauseReconcilePolicyAnnotation]; ok { 184 annotations[asoannotations.ReconcilePolicy] = prevReconcilePolicy 185 delete(annotations, prePauseReconcilePolicyAnnotation) 186 } 187 if !resourceExists { 188 // Create the ASO resource with "skip" in case a matching resource 189 // already exists in Azure, in which case CAPZ will assume it is managed 190 // by the user and ASO should not actively reconcile changes to the ASO 191 // resource. In the canonical "entirely managed by CAPZ" case, the next 192 // reconciliation will reveal the resource does not already exist in 193 // Azure and the ASO resource will be adopted by changing this 194 // annotation to "manage". 195 annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicySkip) 196 } else { 197 adopt = adopt || spec.WasManaged(existing) 198 } 199 if adopt { 200 annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicyManage) 201 } 202 203 // Set the secret name annotation in order to leverage the ASO resource credential scope as defined in 204 // https://azure.github.io/azure-service-operator/guide/authentication/credential-scope/#resource-scope. 205 annotations[asoannotations.PerResourceSecret] = aso.GetASOSecretName(r.clusterName) 206 207 if len(labels) == 0 { 208 labels = nil 209 } 210 parameters.SetLabels(labels) 211 if len(annotations) == 0 { 212 annotations = nil 213 } 214 parameters.SetAnnotations(annotations) 215 216 diff := cmp.Diff(existing, parameters) 217 if diff == "" { 218 if readyErr != nil { 219 // Only return this error when the resource is up to date in order to permit updates from 220 // Parameters which may fix the resource's current state. 221 return zero, readyErr 222 } 223 log.V(2).Info("resource up to date") 224 return existing, nil 225 } 226 log.V(2).Info("creating or updating resource", "diff", diff) 227 return r.createOrUpdateResource(ctx, existing, parameters, resourceExists, serviceName) 228 } 229 230 // PatchedParameters returns the Parameters of spec with patches applied. 231 func PatchedParameters[T genruntime.MetaObject](ctx context.Context, scheme *runtime.Scheme, spec azure.ASOResourceSpecGetter[T], existing T) (T, error) { 232 var zero T // to be returned with non-nil errors 233 parameters, err := spec.Parameters(ctx, existing) 234 if err != nil { 235 return zero, err 236 } 237 return applyPatches(scheme, spec, parameters) 238 } 239 240 func applyPatches[T genruntime.MetaObject](scheme *runtime.Scheme, spec azure.ASOResourceSpecGetter[T], parameters T) (T, error) { 241 p, ok := spec.(Patcher) 242 if !ok { 243 return parameters, nil 244 } 245 246 var zero T // to be returned with non-nil errors 247 248 gvk, err := apiutil.GVKForObject(parameters, scheme) 249 if err != nil { 250 return zero, errors.Wrap(err, "failed to get GroupVersionKind for object") 251 } 252 253 (genruntime.MetaObject)(parameters).(interface{ SetGroupVersionKind(schema.GroupVersionKind) }).SetGroupVersionKind(gvk) 254 paramData, err := json.Marshal(parameters) 255 if err != nil { 256 return zero, errors.Wrap(err, "failed to marshal JSON for patch") 257 } 258 259 for i, extraPatch := range p.ExtraPatches() { 260 jsonPatch, err := yaml.ToJSON([]byte(extraPatch)) 261 if err != nil { 262 return zero, errors.Wrapf(err, "failed to convert patch at index %d to JSON", i) 263 } 264 paramData, err = jsonpatch.MergePatch(paramData, jsonPatch) 265 if err != nil { 266 return zero, errors.Wrapf(err, "failed to apply patch at index %d", i) 267 } 268 } 269 270 decoder := serializer.NewCodecFactory(scheme).UniversalDeserializer() 271 obj, _, err := decoder.Decode(paramData, nil, nil) 272 if err != nil { 273 return zero, errors.Wrap(err, "failed to decode object") 274 } 275 276 t, ok := obj.(T) 277 if !ok { 278 return zero, fmt.Errorf("decoded patched object is %T, not %T", obj, parameters) 279 } 280 281 return t, nil 282 } 283 284 func (r *reconciler[T]) createOrUpdateResource(ctx context.Context, existing T, parameters client.Object, resourceExists bool, serviceName string) (T, error) { 285 var zero T 286 var err error 287 var logMessageVerbPrefix string 288 if resourceExists { 289 logMessageVerbPrefix = "updat" 290 err = r.Client.Patch(ctx, parameters, client.MergeFrom(existing)) 291 } else { 292 logMessageVerbPrefix = "creat" 293 err = r.Client.Create(ctx, parameters) 294 } 295 if err == nil { 296 // Resources need to be requeued to wait for the create or update to finish. 297 return zero, azure.WithTransientError(azure.NewOperationNotDoneError(&infrav1.Future{ 298 Type: createOrUpdateFutureType, 299 ResourceGroup: parameters.GetNamespace(), 300 Name: parameters.GetName(), 301 }), requeueInterval) 302 } 303 return zero, errors.Wrapf(err, fmt.Sprintf("failed to %se resource %s/%s (service: %s)", logMessageVerbPrefix, parameters.GetNamespace(), parameters.GetName(), serviceName)) 304 } 305 306 // DeleteResource implements the logic for deleting a resource Asynchronously. 307 func (r *reconciler[T]) DeleteResource(ctx context.Context, resource T, serviceName string) (err error) { 308 ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.DeleteResource") 309 defer done() 310 311 resource.SetNamespace(r.owner.GetNamespace()) 312 resourceName := resource.GetName() 313 resourceNamespace := resource.GetNamespace() 314 315 log = log.WithValues("service", serviceName, "resource", resourceName, "namespace", resourceNamespace) 316 317 managed, err := IsManaged(ctx, r.Client, resource, r.owner) 318 if apierrors.IsNotFound(err) { 319 // already deleted 320 log.V(2).Info("successfully deleted resource") 321 return nil 322 } 323 if err != nil { 324 return errors.Wrap(err, "failed to determine if resource is managed") 325 } 326 if !managed { 327 log.V(4).Info("skipping delete for unmanaged resource") 328 return nil 329 } 330 331 log.V(2).Info("deleting resource") 332 err = r.Client.Delete(ctx, resource) 333 if err != nil { 334 if apierrors.IsNotFound(err) { 335 // already deleted 336 log.V(2).Info("successfully deleted resource") 337 return nil 338 } 339 return errors.Wrapf(err, "failed to delete resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName) 340 } 341 342 return azure.WithTransientError(azure.NewOperationNotDoneError(&infrav1.Future{ 343 Type: deleteFutureType, 344 ResourceGroup: resourceNamespace, 345 Name: resourceName, 346 }), requeueInterval) 347 } 348 349 // IsManaged returns whether the ASO resource referred to by spec was created by 350 // CAPZ and therefore whether CAPZ should manage its lifecycle. 351 func IsManaged[T genruntime.MetaObject](ctx context.Context, ctrlClient client.Client, resource T, owner client.Object) (bool, error) { 352 ctx, _, done := tele.StartSpanWithLogger(ctx, "services.aso.IsManaged") 353 defer done() 354 355 resource.SetNamespace(owner.GetNamespace()) 356 357 err := ctrlClient.Get(ctx, client.ObjectKeyFromObject(resource), resource) 358 if err != nil { 359 return false, errors.Wrap(err, "error getting resource") 360 } 361 362 return isOwnedBy(resource, owner, ctrlClient.Scheme()) 363 } 364 365 func isOwnedBy(resource client.Object, owner client.Object, scheme *runtime.Scheme) (bool, error) { 366 ownerGVK, err := apiutil.GVKForObject(owner, scheme) 367 if err != nil { 368 return false, err 369 } 370 existingOwner := metav1.GetControllerOf(resource) 371 return existingOwner != nil && 372 existingOwner.APIVersion == ownerGVK.GroupVersion().String() && 373 existingOwner.Kind == ownerGVK.Kind && 374 existingOwner.Name == owner.GetName(), nil 375 } 376 377 func hasLegacyOwnedByLabel(labels map[string]string, clusterName string) bool { 378 //nolint:staticcheck // Referencing this deprecated value is required for backwards compatibility. 379 return labels[infrav1.OwnedByClusterLabelKey] == clusterName 380 } 381 382 // PauseResource pauses an ASO resource by updating its `reconcile-policy` to `skip`. 383 func (r *reconciler[T]) PauseResource(ctx context.Context, resource T, serviceName string) error { 384 ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.PauseResource") 385 defer done() 386 387 resource.SetNamespace(r.owner.GetNamespace()) 388 389 log = log.WithValues("service", serviceName, "resource", resource.GetName(), "namespace", resource.GetNamespace()) 390 391 if err := r.Client.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil { 392 return err 393 } 394 if isOwned, err := isOwnedBy(resource, r.owner, r.Scheme()); err != nil { 395 return err 396 } else if !isOwned { 397 log.V(4).Info("Skipping pause of unmanaged resource") 398 return nil 399 } 400 401 annotations := resource.GetAnnotations() 402 if _, exists := annotations[prePauseReconcilePolicyAnnotation]; exists { 403 log.V(4).Info("resource is already paused") 404 return nil 405 } 406 407 log.V(4).Info("Pausing resource") 408 before := resource.DeepCopyObject().(genruntime.MetaObject) 409 410 if annotations == nil { 411 annotations = make(map[string]string, 2) 412 } 413 annotations[prePauseReconcilePolicyAnnotation] = annotations[asoannotations.ReconcilePolicy] 414 annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicySkip) 415 resource.SetAnnotations(annotations) 416 417 return r.Client.Patch(ctx, resource, client.MergeFrom(before)) 418 }