sigs.k8s.io/cluster-api-provider-azure@v1.14.3/azure/services/aso/aso.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package aso
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"time"
    24  
    25  	asoannotations "github.com/Azure/azure-service-operator/v2/pkg/common/annotations"
    26  	"github.com/Azure/azure-service-operator/v2/pkg/genruntime"
    27  	"github.com/Azure/azure-service-operator/v2/pkg/genruntime/conditions"
    28  	jsonpatch "github.com/evanphx/json-patch/v5"
    29  	"github.com/google/go-cmp/cmp"
    30  	"github.com/pkg/errors"
    31  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/runtime"
    34  	"k8s.io/apimachinery/pkg/runtime/schema"
    35  	"k8s.io/apimachinery/pkg/runtime/serializer"
    36  	"k8s.io/apimachinery/pkg/util/yaml"
    37  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    38  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    39  	"sigs.k8s.io/cluster-api-provider-azure/util/aso"
    40  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    41  	"sigs.k8s.io/controller-runtime/pkg/client"
    42  	"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
    43  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    44  )
    45  
    46  const (
    47  	// prePauseReconcilePolicyAnnotation is the annotation key for the value of
    48  	// asoannotations.ReconcilePolicy that was set before pausing.
    49  	prePauseReconcilePolicyAnnotation = "sigs.k8s.io/cluster-api-provider-azure-pre-pause-reconcile-policy"
    50  
    51  	requeueInterval = 20 * time.Second
    52  
    53  	createOrUpdateFutureType = "ASOCreateOrUpdate"
    54  	deleteFutureType         = "ASODelete"
    55  )
    56  
    57  // reconciler is an implementation of the Reconciler interface. It handles creation
    58  // and deletion of resources using ASO.
    59  type reconciler[T genruntime.MetaObject] struct {
    60  	client.Client
    61  
    62  	clusterName string
    63  	owner       client.Object
    64  }
    65  
    66  // New creates a new ASO reconciler.
    67  func New[T genruntime.MetaObject](ctrlClient client.Client, clusterName string, owner client.Object) Reconciler[T] {
    68  	return &reconciler[T]{
    69  		Client:      ctrlClient,
    70  		clusterName: clusterName,
    71  		owner:       owner,
    72  	}
    73  }
    74  
    75  // CreateOrUpdateResource implements the logic for creating a new or updating an
    76  // existing resource with ASO.
    77  func (r *reconciler[T]) CreateOrUpdateResource(ctx context.Context, spec azure.ASOResourceSpecGetter[T], serviceName string) (T, error) {
    78  	ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.CreateOrUpdateResource")
    79  	defer done()
    80  
    81  	resource := spec.ResourceRef()
    82  	resource.SetNamespace(r.owner.GetNamespace())
    83  	resourceName := resource.GetName()
    84  	resourceNamespace := resource.GetNamespace()
    85  
    86  	log = log.WithValues("service", serviceName, "resource", resourceName, "namespace", resourceNamespace)
    87  
    88  	var readyErr error
    89  	var adopt bool
    90  	var existing T
    91  	var zero T // holds the zero value, to be returned with non-nil errors.
    92  	resourceExists := false
    93  	if err := r.Client.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil {
    94  		if !apierrors.IsNotFound(err) {
    95  			return zero, errors.Wrapf(err, "failed to get existing resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName)
    96  		}
    97  		log.V(2).Info("existing resource not found, will create a new one")
    98  	} else {
    99  		existing = resource
   100  		resourceExists = true
   101  		log.V(2).Info("successfully got existing resource")
   102  
   103  		if isOwned, err := isOwnedBy(resource, r.owner, r.Scheme()); err != nil {
   104  			return zero, err
   105  		} else if !isOwned && !hasLegacyOwnedByLabel(resource.GetLabels(), r.clusterName) {
   106  			log.V(4).Info("skipping reconcile for unmanaged resource")
   107  			return existing, nil
   108  		}
   109  
   110  		// Check if there is an ongoing long running operation.
   111  		conds := existing.GetConditions()
   112  		i, readyExists := conds.FindIndexByType(conditions.ConditionTypeReady)
   113  		if !readyExists {
   114  			return zero, azure.WithTransientError(errors.New("ready status unknown"), requeueInterval)
   115  		}
   116  		if cond := conds[i]; cond.Status != metav1.ConditionTrue {
   117  			switch {
   118  			case cond.Reason == conditions.ReasonAzureResourceNotFound.Name &&
   119  				existing.GetAnnotations()[asoannotations.ReconcilePolicy] == string(asoannotations.ReconcilePolicySkip):
   120  				// This resource was originally created by CAPZ and a
   121  				// corresponding Azure resource has been found not to exist, so
   122  				// CAPZ will tell ASO to adopt the resource by setting its
   123  				// reconcile policy to "manage". This extra step is necessary to
   124  				// handle user-managed resources that already exist in Azure and
   125  				// should not be reconciled by ASO while ensuring they're still
   126  				// represented in ASO.
   127  				log.V(2).Info("resource not found in Azure and \"skip\" reconcile-policy set, adopting")
   128  				// Don't set readyErr so the resource can be adopted with an
   129  				// update instead of returning early.
   130  				adopt = true
   131  			case cond.Reason == conditions.ReasonReconciling.Name:
   132  				readyErr = azure.NewOperationNotDoneError(&infrav1.Future{
   133  					Type:          createOrUpdateFutureType,
   134  					ResourceGroup: existing.GetNamespace(),
   135  					Name:          existing.GetName(),
   136  				})
   137  			default:
   138  				readyErr = fmt.Errorf("resource is not Ready: %s", conds[i].Message)
   139  			}
   140  
   141  			if readyErr != nil {
   142  				if conds[i].Severity == conditions.ConditionSeverityError {
   143  					readyErr = azure.WithTerminalError(readyErr)
   144  				} else {
   145  					readyErr = azure.WithTransientError(readyErr, requeueInterval)
   146  				}
   147  			}
   148  		}
   149  	}
   150  
   151  	// Construct parameters using the resource spec and information from the existing resource, if there is one.
   152  	var existingCopy T
   153  	if resourceExists {
   154  		existingCopy = existing.DeepCopyObject().(T)
   155  	}
   156  	parameters, err := PatchedParameters(ctx, r.Scheme(), spec, existingCopy)
   157  	if err != nil {
   158  		return zero, errors.Wrapf(err, "failed to get desired parameters for resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName)
   159  	}
   160  
   161  	parameters.SetName(resourceName)
   162  	parameters.SetNamespace(resourceNamespace)
   163  
   164  	if err := controllerutil.SetControllerReference(r.owner, parameters, r.Client.Scheme()); err != nil {
   165  		return zero, errors.Wrap(err, "failed to set owner ref")
   166  	}
   167  
   168  	if t, ok := spec.(TagsGetterSetter[T]); ok {
   169  		if err := reconcileTags(t, existing, resourceExists, parameters); err != nil {
   170  			return zero, errors.Wrap(err, "failed to reconcile tags")
   171  		}
   172  	}
   173  
   174  	labels := parameters.GetLabels()
   175  	if labels == nil {
   176  		labels = make(map[string]string)
   177  	}
   178  	annotations := parameters.GetAnnotations()
   179  	if annotations == nil {
   180  		annotations = make(map[string]string)
   181  	}
   182  
   183  	if prevReconcilePolicy, ok := annotations[prePauseReconcilePolicyAnnotation]; ok {
   184  		annotations[asoannotations.ReconcilePolicy] = prevReconcilePolicy
   185  		delete(annotations, prePauseReconcilePolicyAnnotation)
   186  	}
   187  	if !resourceExists {
   188  		// Create the ASO resource with "skip" in case a matching resource
   189  		// already exists in Azure, in which case CAPZ will assume it is managed
   190  		// by the user and ASO should not actively reconcile changes to the ASO
   191  		// resource. In the canonical "entirely managed by CAPZ" case, the next
   192  		// reconciliation will reveal the resource does not already exist in
   193  		// Azure and the ASO resource will be adopted by changing this
   194  		// annotation to "manage".
   195  		annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicySkip)
   196  	} else {
   197  		adopt = adopt || spec.WasManaged(existing)
   198  	}
   199  	if adopt {
   200  		annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicyManage)
   201  	}
   202  
   203  	// Set the secret name annotation in order to leverage the ASO resource credential scope as defined in
   204  	// https://azure.github.io/azure-service-operator/guide/authentication/credential-scope/#resource-scope.
   205  	annotations[asoannotations.PerResourceSecret] = aso.GetASOSecretName(r.clusterName)
   206  
   207  	if len(labels) == 0 {
   208  		labels = nil
   209  	}
   210  	parameters.SetLabels(labels)
   211  	if len(annotations) == 0 {
   212  		annotations = nil
   213  	}
   214  	parameters.SetAnnotations(annotations)
   215  
   216  	diff := cmp.Diff(existing, parameters)
   217  	if diff == "" {
   218  		if readyErr != nil {
   219  			// Only return this error when the resource is up to date in order to permit updates from
   220  			// Parameters which may fix the resource's current state.
   221  			return zero, readyErr
   222  		}
   223  		log.V(2).Info("resource up to date")
   224  		return existing, nil
   225  	}
   226  	log.V(2).Info("creating or updating resource", "diff", diff)
   227  	return r.createOrUpdateResource(ctx, existing, parameters, resourceExists, serviceName)
   228  }
   229  
   230  // PatchedParameters returns the Parameters of spec with patches applied.
   231  func PatchedParameters[T genruntime.MetaObject](ctx context.Context, scheme *runtime.Scheme, spec azure.ASOResourceSpecGetter[T], existing T) (T, error) {
   232  	var zero T // to be returned with non-nil errors
   233  	parameters, err := spec.Parameters(ctx, existing)
   234  	if err != nil {
   235  		return zero, err
   236  	}
   237  	return applyPatches(scheme, spec, parameters)
   238  }
   239  
   240  func applyPatches[T genruntime.MetaObject](scheme *runtime.Scheme, spec azure.ASOResourceSpecGetter[T], parameters T) (T, error) {
   241  	p, ok := spec.(Patcher)
   242  	if !ok {
   243  		return parameters, nil
   244  	}
   245  
   246  	var zero T // to be returned with non-nil errors
   247  
   248  	gvk, err := apiutil.GVKForObject(parameters, scheme)
   249  	if err != nil {
   250  		return zero, errors.Wrap(err, "failed to get GroupVersionKind for object")
   251  	}
   252  
   253  	(genruntime.MetaObject)(parameters).(interface{ SetGroupVersionKind(schema.GroupVersionKind) }).SetGroupVersionKind(gvk)
   254  	paramData, err := json.Marshal(parameters)
   255  	if err != nil {
   256  		return zero, errors.Wrap(err, "failed to marshal JSON for patch")
   257  	}
   258  
   259  	for i, extraPatch := range p.ExtraPatches() {
   260  		jsonPatch, err := yaml.ToJSON([]byte(extraPatch))
   261  		if err != nil {
   262  			return zero, errors.Wrapf(err, "failed to convert patch at index %d to JSON", i)
   263  		}
   264  		paramData, err = jsonpatch.MergePatch(paramData, jsonPatch)
   265  		if err != nil {
   266  			return zero, errors.Wrapf(err, "failed to apply patch at index %d", i)
   267  		}
   268  	}
   269  
   270  	decoder := serializer.NewCodecFactory(scheme).UniversalDeserializer()
   271  	obj, _, err := decoder.Decode(paramData, nil, nil)
   272  	if err != nil {
   273  		return zero, errors.Wrap(err, "failed to decode object")
   274  	}
   275  
   276  	t, ok := obj.(T)
   277  	if !ok {
   278  		return zero, fmt.Errorf("decoded patched object is %T, not %T", obj, parameters)
   279  	}
   280  
   281  	return t, nil
   282  }
   283  
   284  func (r *reconciler[T]) createOrUpdateResource(ctx context.Context, existing T, parameters client.Object, resourceExists bool, serviceName string) (T, error) {
   285  	var zero T
   286  	var err error
   287  	var logMessageVerbPrefix string
   288  	if resourceExists {
   289  		logMessageVerbPrefix = "updat"
   290  		err = r.Client.Patch(ctx, parameters, client.MergeFrom(existing))
   291  	} else {
   292  		logMessageVerbPrefix = "creat"
   293  		err = r.Client.Create(ctx, parameters)
   294  	}
   295  	if err == nil {
   296  		// Resources need to be requeued to wait for the create or update to finish.
   297  		return zero, azure.WithTransientError(azure.NewOperationNotDoneError(&infrav1.Future{
   298  			Type:          createOrUpdateFutureType,
   299  			ResourceGroup: parameters.GetNamespace(),
   300  			Name:          parameters.GetName(),
   301  		}), requeueInterval)
   302  	}
   303  	return zero, errors.Wrapf(err, fmt.Sprintf("failed to %se resource %s/%s (service: %s)", logMessageVerbPrefix, parameters.GetNamespace(), parameters.GetName(), serviceName))
   304  }
   305  
   306  // DeleteResource implements the logic for deleting a resource Asynchronously.
   307  func (r *reconciler[T]) DeleteResource(ctx context.Context, resource T, serviceName string) (err error) {
   308  	ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.DeleteResource")
   309  	defer done()
   310  
   311  	resource.SetNamespace(r.owner.GetNamespace())
   312  	resourceName := resource.GetName()
   313  	resourceNamespace := resource.GetNamespace()
   314  
   315  	log = log.WithValues("service", serviceName, "resource", resourceName, "namespace", resourceNamespace)
   316  
   317  	managed, err := IsManaged(ctx, r.Client, resource, r.owner)
   318  	if apierrors.IsNotFound(err) {
   319  		// already deleted
   320  		log.V(2).Info("successfully deleted resource")
   321  		return nil
   322  	}
   323  	if err != nil {
   324  		return errors.Wrap(err, "failed to determine if resource is managed")
   325  	}
   326  	if !managed {
   327  		log.V(4).Info("skipping delete for unmanaged resource")
   328  		return nil
   329  	}
   330  
   331  	log.V(2).Info("deleting resource")
   332  	err = r.Client.Delete(ctx, resource)
   333  	if err != nil {
   334  		if apierrors.IsNotFound(err) {
   335  			// already deleted
   336  			log.V(2).Info("successfully deleted resource")
   337  			return nil
   338  		}
   339  		return errors.Wrapf(err, "failed to delete resource %s/%s (service: %s)", resourceNamespace, resourceName, serviceName)
   340  	}
   341  
   342  	return azure.WithTransientError(azure.NewOperationNotDoneError(&infrav1.Future{
   343  		Type:          deleteFutureType,
   344  		ResourceGroup: resourceNamespace,
   345  		Name:          resourceName,
   346  	}), requeueInterval)
   347  }
   348  
   349  // IsManaged returns whether the ASO resource referred to by spec was created by
   350  // CAPZ and therefore whether CAPZ should manage its lifecycle.
   351  func IsManaged[T genruntime.MetaObject](ctx context.Context, ctrlClient client.Client, resource T, owner client.Object) (bool, error) {
   352  	ctx, _, done := tele.StartSpanWithLogger(ctx, "services.aso.IsManaged")
   353  	defer done()
   354  
   355  	resource.SetNamespace(owner.GetNamespace())
   356  
   357  	err := ctrlClient.Get(ctx, client.ObjectKeyFromObject(resource), resource)
   358  	if err != nil {
   359  		return false, errors.Wrap(err, "error getting resource")
   360  	}
   361  
   362  	return isOwnedBy(resource, owner, ctrlClient.Scheme())
   363  }
   364  
   365  func isOwnedBy(resource client.Object, owner client.Object, scheme *runtime.Scheme) (bool, error) {
   366  	ownerGVK, err := apiutil.GVKForObject(owner, scheme)
   367  	if err != nil {
   368  		return false, err
   369  	}
   370  	existingOwner := metav1.GetControllerOf(resource)
   371  	return existingOwner != nil &&
   372  		existingOwner.APIVersion == ownerGVK.GroupVersion().String() &&
   373  		existingOwner.Kind == ownerGVK.Kind &&
   374  		existingOwner.Name == owner.GetName(), nil
   375  }
   376  
   377  func hasLegacyOwnedByLabel(labels map[string]string, clusterName string) bool {
   378  	//nolint:staticcheck // Referencing this deprecated value is required for backwards compatibility.
   379  	return labels[infrav1.OwnedByClusterLabelKey] == clusterName
   380  }
   381  
   382  // PauseResource pauses an ASO resource by updating its `reconcile-policy` to `skip`.
   383  func (r *reconciler[T]) PauseResource(ctx context.Context, resource T, serviceName string) error {
   384  	ctx, log, done := tele.StartSpanWithLogger(ctx, "services.aso.PauseResource")
   385  	defer done()
   386  
   387  	resource.SetNamespace(r.owner.GetNamespace())
   388  
   389  	log = log.WithValues("service", serviceName, "resource", resource.GetName(), "namespace", resource.GetNamespace())
   390  
   391  	if err := r.Client.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil {
   392  		return err
   393  	}
   394  	if isOwned, err := isOwnedBy(resource, r.owner, r.Scheme()); err != nil {
   395  		return err
   396  	} else if !isOwned {
   397  		log.V(4).Info("Skipping pause of unmanaged resource")
   398  		return nil
   399  	}
   400  
   401  	annotations := resource.GetAnnotations()
   402  	if _, exists := annotations[prePauseReconcilePolicyAnnotation]; exists {
   403  		log.V(4).Info("resource is already paused")
   404  		return nil
   405  	}
   406  
   407  	log.V(4).Info("Pausing resource")
   408  	before := resource.DeepCopyObject().(genruntime.MetaObject)
   409  
   410  	if annotations == nil {
   411  		annotations = make(map[string]string, 2)
   412  	}
   413  	annotations[prePauseReconcilePolicyAnnotation] = annotations[asoannotations.ReconcilePolicy]
   414  	annotations[asoannotations.ReconcilePolicy] = string(asoannotations.ReconcilePolicySkip)
   415  	resource.SetAnnotations(annotations)
   416  
   417  	return r.Client.Patch(ctx, resource, client.MergeFrom(before))
   418  }