open-cluster-management.io/governance-policy-propagator@v0.13.0/controllers/propagator/replicatedpolicy_controller.go (about)

     1  package propagator
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"strconv"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	templates "github.com/stolostron/go-template-utils/v4/pkg/templates"
    13  	k8sdepwatches "github.com/stolostron/kubernetes-dependency-watches/client"
    14  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    15  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    16  	"k8s.io/apimachinery/pkg/types"
    17  	clusterv1beta1 "open-cluster-management.io/api/cluster/v1beta1"
    18  	appsv1 "open-cluster-management.io/multicloud-operators-subscription/pkg/apis/apps/placementrule/v1"
    19  	ctrl "sigs.k8s.io/controller-runtime"
    20  	"sigs.k8s.io/controller-runtime/pkg/client"
    21  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    22  
    23  	policiesv1 "open-cluster-management.io/governance-policy-propagator/api/v1"
    24  	"open-cluster-management.io/governance-policy-propagator/controllers/common"
    25  	"open-cluster-management.io/governance-policy-propagator/controllers/complianceeventsapi"
    26  )
    27  
    28  const (
    29  	ParentPolicyIDAnnotation = "policy.open-cluster-management.io/parent-policy-compliance-db-id"
    30  	PolicyIDAnnotation       = "policy.open-cluster-management.io/policy-compliance-db-id"
    31  )
    32  
    33  var _ reconcile.Reconciler = &ReplicatedPolicyReconciler{}
    34  
    35  type ReplicatedPolicyReconciler struct {
    36  	Propagator
    37  	ResourceVersions    *sync.Map
    38  	DynamicWatcher      k8sdepwatches.DynamicWatcher
    39  	TemplateResolver    *templates.TemplateResolver
    40  	ComplianceServerCtx *complianceeventsapi.ComplianceServerCtx
    41  }
    42  
    43  func (r *ReplicatedPolicyReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
    44  	log := log.WithValues("Request.Namespace", request.Namespace, "Request.Name", request.Name)
    45  	log.Info("Reconciling the replicated policy")
    46  
    47  	// Set the hub template watch metric after reconcile
    48  	defer func() {
    49  		hubTempWatches := r.TemplateResolver.GetWatchCount()
    50  		log.V(3).Info("Setting hub template watch metric", "value", hubTempWatches)
    51  
    52  		hubTemplateActiveWatchesMetric.Set(float64(hubTempWatches))
    53  	}()
    54  
    55  	replicatedExists := true
    56  	replicatedPolicy := &policiesv1.Policy{}
    57  
    58  	if err := r.Get(ctx, request.NamespacedName, replicatedPolicy); err != nil {
    59  		if !k8serrors.IsNotFound(err) {
    60  			log.Error(err, "Failed to get the replicated policy")
    61  
    62  			return reconcile.Result{}, err
    63  		}
    64  
    65  		replicatedExists = false
    66  	}
    67  
    68  	rootName, rootNS, err := common.ParseRootPolicyLabel(request.Name)
    69  	if err != nil {
    70  		if !replicatedExists {
    71  			log.Error(err, "Invalid replicated policy sent for reconcile, rejecting")
    72  
    73  			return reconcile.Result{}, nil
    74  		}
    75  
    76  		cleanUpErr := r.cleanUpReplicated(ctx, replicatedPolicy)
    77  		if cleanUpErr != nil && !k8serrors.IsNotFound(cleanUpErr) {
    78  			log.Error(err, "Failed to delete the invalid replicated policy, requeueing")
    79  
    80  			return reconcile.Result{}, err
    81  		}
    82  
    83  		log.Info("Invalid replicated policy deleted")
    84  
    85  		return reconcile.Result{}, nil
    86  	}
    87  
    88  	rsrcVersKey := request.Namespace + "/" + request.Name
    89  
    90  	// Fetch the Root Policy instance
    91  	rootPolicy := &policiesv1.Policy{}
    92  	rootNN := types.NamespacedName{Namespace: rootNS, Name: rootName}
    93  
    94  	if err := r.Get(ctx, rootNN, rootPolicy); err != nil {
    95  		if !k8serrors.IsNotFound(err) {
    96  			log.Error(err, "Failed to get the root policy, requeueing")
    97  
    98  			return reconcile.Result{}, err
    99  		}
   100  
   101  		if !replicatedExists {
   102  			version := safeWriteLoad(r.ResourceVersions, rsrcVersKey)
   103  			defer version.Unlock()
   104  
   105  			// Store this to ensure the cache matches a known possible state for this situation
   106  			version.resourceVersion = "deleted"
   107  
   108  			log.V(1).Info("Root policy and replicated policy already missing")
   109  
   110  			return reconcile.Result{}, nil
   111  		}
   112  
   113  		inClusterNS, err := common.IsInClusterNamespace(ctx, r.Client, request.Namespace)
   114  		if err != nil {
   115  			return reconcile.Result{}, err
   116  		}
   117  
   118  		if !inClusterNS {
   119  			// "Hub of hubs" scenario: this cluster is managed by another cluster,
   120  			// which has the root policy for the policy being reconciled.
   121  			log.V(1).Info("Found a replicated policy in non-cluster namespace, skipping it")
   122  
   123  			return reconcile.Result{}, nil
   124  		}
   125  
   126  		// otherwise, we need to clean it up
   127  		if err := r.cleanUpReplicated(ctx, replicatedPolicy); err != nil {
   128  			if !k8serrors.IsNotFound(err) {
   129  				log.Error(err, "Failed to delete the orphaned replicated policy, requeueing")
   130  
   131  				return reconcile.Result{}, err
   132  			}
   133  		}
   134  
   135  		log.Info("Orphaned replicated policy deleted")
   136  
   137  		return reconcile.Result{}, nil
   138  	}
   139  
   140  	if rootPolicy.Spec.Disabled {
   141  		if replicatedExists {
   142  			if err := r.cleanUpReplicated(ctx, replicatedPolicy); err != nil {
   143  				if !k8serrors.IsNotFound(err) {
   144  					log.Error(err, "Failed to delete the disabled replicated policy, requeueing")
   145  
   146  					return reconcile.Result{}, err
   147  				}
   148  			}
   149  
   150  			log.Info("Disabled replicated policy deleted")
   151  
   152  			return reconcile.Result{}, nil
   153  		}
   154  
   155  		version := safeWriteLoad(r.ResourceVersions, rsrcVersKey)
   156  		defer version.Unlock()
   157  
   158  		// Store this to ensure the cache matches a known possible state for this situation
   159  		version.resourceVersion = "deleted"
   160  
   161  		log.V(1).Info("Root policy is disabled, and replicated policy correctly not found.")
   162  
   163  		return reconcile.Result{}, nil
   164  	}
   165  
   166  	// calculate the decision for this specific cluster
   167  	decision, err := r.singleClusterDecision(ctx, rootPolicy, request.Namespace)
   168  	if err != nil {
   169  		log.Error(err, "Failed to determine if policy should be replicated, requeueing")
   170  
   171  		return reconcile.Result{}, err
   172  	}
   173  
   174  	// an empty decision means the policy should not be replicated
   175  	if decision.Cluster == "" {
   176  		if replicatedExists {
   177  			inClusterNS, err := common.IsInClusterNamespace(ctx, r.Client, request.Namespace)
   178  			if err != nil {
   179  				return reconcile.Result{}, err
   180  			}
   181  
   182  			if !inClusterNS {
   183  				// "Hosted mode" scenario: this cluster is hosting another cluster, which is syncing
   184  				// this policy to a cluster namespace that this propagator doesn't know about.
   185  				log.V(1).Info("Found a possible replicated policy for a hosted cluster, skipping it")
   186  
   187  				return reconcile.Result{}, nil
   188  			}
   189  
   190  			if err := r.cleanUpReplicated(ctx, replicatedPolicy); err != nil {
   191  				if !k8serrors.IsNotFound(err) {
   192  					log.Error(err, "Failed to remove the replicated policy for this managed cluster, requeueing")
   193  
   194  					return reconcile.Result{}, err
   195  				}
   196  			}
   197  
   198  			log.Info("Removed replicated policy from managed cluster")
   199  
   200  			return reconcile.Result{}, nil
   201  		}
   202  
   203  		version := safeWriteLoad(r.ResourceVersions, rsrcVersKey)
   204  		defer version.Unlock()
   205  
   206  		// Store this to ensure the cache matches a known possible state for this situation
   207  		version.resourceVersion = "deleted"
   208  
   209  		log.V(1).Info("Replicated policy should not exist on this managed cluster, and does not.")
   210  
   211  		return reconcile.Result{}, nil
   212  	}
   213  
   214  	objsToWatch := getPolicySetDependencies(rootPolicy)
   215  
   216  	desiredReplicatedPolicy, err := r.buildReplicatedPolicy(ctx, rootPolicy, decision)
   217  	if err != nil {
   218  		log.Error(err, "Unable to build desired replicated policy, requeueing")
   219  
   220  		return reconcile.Result{}, err
   221  	}
   222  
   223  	instanceGVK := desiredReplicatedPolicy.GroupVersionKind()
   224  	instanceObjID := k8sdepwatches.ObjectIdentifier{
   225  		Group:     instanceGVK.Group,
   226  		Version:   instanceGVK.Version,
   227  		Kind:      instanceGVK.Kind,
   228  		Namespace: request.Namespace,
   229  		Name:      request.Name,
   230  	}
   231  
   232  	// save the watcherError for later, so that the policy can still be updated now.
   233  	var watcherErr error
   234  
   235  	if policyHasTemplates(rootPolicy) {
   236  		if replicatedExists {
   237  			// If the replicated policy has an initialization vector specified, set it for processing
   238  			if initializationVector, ok := replicatedPolicy.Annotations[IVAnnotation]; ok {
   239  				tempAnnotations := desiredReplicatedPolicy.GetAnnotations()
   240  				if tempAnnotations == nil {
   241  					tempAnnotations = make(map[string]string)
   242  				}
   243  
   244  				tempAnnotations[IVAnnotation] = initializationVector
   245  
   246  				desiredReplicatedPolicy.SetAnnotations(tempAnnotations)
   247  			}
   248  		}
   249  
   250  		// Any errors to expose to the user are logged and recorded in the processTemplates method. Only retry
   251  		// the request if it's determined to be a retryable error (i.e. don't retry syntax errors).
   252  		err := r.processTemplates(ctx, desiredReplicatedPolicy, decision.Cluster, rootPolicy)
   253  		if errors.Is(err, ErrRetryable) {
   254  			// Return the error if it's retryable, which will utilize controller-runtime's exponential backoff.
   255  			return reconcile.Result{}, err
   256  		}
   257  	} else {
   258  		watcherErr := r.TemplateResolver.UncacheWatcher(instanceObjID)
   259  		if watcherErr != nil {
   260  			log.Error(watcherErr, "Failed to uncache objects related to the replicated policy's templates")
   261  		}
   262  	}
   263  
   264  	r.setDBAnnotations(ctx, rootPolicy, desiredReplicatedPolicy, replicatedPolicy)
   265  
   266  	if len(objsToWatch) != 0 {
   267  		refObjs := make([]k8sdepwatches.ObjectIdentifier, 0, len(objsToWatch))
   268  		for objToWatch := range objsToWatch {
   269  			refObjs = append(refObjs, objToWatch)
   270  		}
   271  
   272  		watcherErr = r.DynamicWatcher.AddOrUpdateWatcher(instanceObjID, refObjs...)
   273  		if watcherErr != nil {
   274  			log.Error(watcherErr, "Failed to update the dynamic watches for the policy set dependencies")
   275  		}
   276  	} else {
   277  		watcherErr = r.DynamicWatcher.RemoveWatcher(instanceObjID)
   278  		if watcherErr != nil {
   279  			log.Error(watcherErr, "Failed to remove the dynamic watches for the hub policy templates")
   280  		}
   281  	}
   282  
   283  	if !replicatedExists {
   284  		version := safeWriteLoad(r.ResourceVersions, rsrcVersKey)
   285  		defer version.Unlock()
   286  
   287  		err = r.Create(ctx, desiredReplicatedPolicy)
   288  		if err != nil {
   289  			log.Error(err, "Failed to create the replicated policy, requeueing")
   290  
   291  			return reconcile.Result{}, err
   292  		}
   293  
   294  		r.Recorder.Event(rootPolicy, "Normal", "PolicyPropagation",
   295  			fmt.Sprintf("Policy %s/%s was propagated to cluster %s", rootPolicy.GetNamespace(),
   296  				rootPolicy.GetName(), decision.Cluster))
   297  
   298  		version.resourceVersion = desiredReplicatedPolicy.GetResourceVersion()
   299  
   300  		log.Info("Created replicated policy")
   301  
   302  		return reconcile.Result{}, watcherErr
   303  	}
   304  
   305  	version := safeWriteLoad(r.ResourceVersions, rsrcVersKey)
   306  	defer version.Unlock()
   307  
   308  	// replicated policy already created, need to compare and possibly update
   309  	if !equivalentReplicatedPolicies(desiredReplicatedPolicy, replicatedPolicy) {
   310  		replicatedPolicy.SetAnnotations(desiredReplicatedPolicy.GetAnnotations())
   311  		replicatedPolicy.SetLabels(desiredReplicatedPolicy.GetLabels())
   312  		replicatedPolicy.Spec = desiredReplicatedPolicy.Spec
   313  
   314  		err = r.Update(ctx, replicatedPolicy)
   315  		if err != nil {
   316  			log.Error(err, "Failed to update the replicated policy, requeueing")
   317  
   318  			return reconcile.Result{}, err
   319  		}
   320  
   321  		r.Recorder.Event(rootPolicy, "Normal", "PolicyPropagation",
   322  			fmt.Sprintf("Policy %s/%s was updated for cluster %s", rootPolicy.GetNamespace(),
   323  				rootPolicy.GetName(), decision.Cluster))
   324  
   325  		log.Info("Replicated policy updated")
   326  	} else {
   327  		log.Info("Replicated policy matches, no update needed")
   328  	}
   329  
   330  	// whether it was updated or not, this resourceVersion can be cached
   331  	version.resourceVersion = replicatedPolicy.GetResourceVersion()
   332  
   333  	if watcherErr != nil {
   334  		log.Info("Requeueing for the dynamic watcher error")
   335  	}
   336  
   337  	return reconcile.Result{}, watcherErr
   338  }
   339  
   340  // getParentPolicyID needs to have the caller call r.ComplianceServerCtx.Lock.RLock.
   341  func (r *ReplicatedPolicyReconciler) getParentPolicyID(
   342  	ctx context.Context,
   343  	rootPolicy *policiesv1.Policy,
   344  	existingReplicatedPolicy *policiesv1.Policy,
   345  ) (int32, error) {
   346  	dbParentPolicy := complianceeventsapi.ParentPolicyFromPolicyObj(rootPolicy)
   347  
   348  	// Check the cache first.
   349  	cachedParentPolicyID, ok := r.ComplianceServerCtx.ParentPolicyToID.Load(dbParentPolicy.Key())
   350  	if ok {
   351  		return cachedParentPolicyID.(int32), nil
   352  	}
   353  
   354  	// Try the database second before checking the replicated policy to be able to recover if the compliance history
   355  	// database is restored from backup and the IDs on the replicated policy no longer exist.
   356  	var dbErr error
   357  
   358  	if r.ComplianceServerCtx.DB != nil {
   359  		err := dbParentPolicy.GetOrCreate(ctx, r.ComplianceServerCtx.DB)
   360  		if err == nil {
   361  			r.ComplianceServerCtx.ParentPolicyToID.Store(dbParentPolicy.Key(), dbParentPolicy.KeyID)
   362  
   363  			return dbParentPolicy.KeyID, nil
   364  		}
   365  
   366  		if r.ComplianceServerCtx.DB.PingContext(ctx) != nil {
   367  			dbErr = complianceeventsapi.ErrDBConnectionFailed
   368  		} else {
   369  			dbErr = fmt.Errorf("%w: failed to get the database ID of the parent policy", err)
   370  		}
   371  	} else {
   372  		dbErr = complianceeventsapi.ErrDBConnectionFailed
   373  	}
   374  
   375  	// Check if the existing replicated policy already has the annotation set and has the same
   376  	// categories, controls, and standards as the current root policy.
   377  	var parentPolicyIDFromRepl string
   378  	if existingReplicatedPolicy != nil {
   379  		parentPolicyIDFromRepl = existingReplicatedPolicy.Annotations[ParentPolicyIDAnnotation]
   380  	}
   381  
   382  	if parentPolicyIDFromRepl != "" {
   383  		dbParentPolicyFromRepl := complianceeventsapi.ParentPolicyFromPolicyObj(existingReplicatedPolicy)
   384  		dbParentPolicyFromRepl.Name = rootPolicy.Name
   385  		dbParentPolicyFromRepl.Namespace = rootPolicy.Namespace
   386  
   387  		if dbParentPolicy.Key() == dbParentPolicyFromRepl.Key() {
   388  			parentPolicyID, err := strconv.ParseInt(parentPolicyIDFromRepl, 10, 32)
   389  			if err == nil && parentPolicyID != 0 {
   390  				r.ComplianceServerCtx.ParentPolicyToID.Store(dbParentPolicy.Key(), int32(parentPolicyID))
   391  
   392  				return int32(parentPolicyID), nil
   393  			}
   394  		}
   395  	}
   396  
   397  	return 0, dbErr
   398  }
   399  
   400  // getPolicyID needs to have the caller call r.ComplianceServerCtx.Lock.RLock.
   401  func (r *ReplicatedPolicyReconciler) getPolicyID(
   402  	ctx context.Context,
   403  	replPolicy *policiesv1.Policy,
   404  	existingReplPolicy *policiesv1.Policy,
   405  	replTemplateIdx int,
   406  	skipDB bool,
   407  ) (int32, *unstructured.Unstructured, error) {
   408  	// Start by checking the cache.
   409  	plcTemplate := replPolicy.Spec.PolicyTemplates[replTemplateIdx]
   410  	plcTmplUnstruct := &unstructured.Unstructured{}
   411  
   412  	err := plcTmplUnstruct.UnmarshalJSON(plcTemplate.ObjectDefinition.Raw)
   413  	if err != nil {
   414  		return 0, plcTmplUnstruct, err
   415  	}
   416  
   417  	dbPolicy := complianceeventsapi.PolicyFromUnstructured(*plcTmplUnstruct)
   418  	if err := dbPolicy.Validate(); err != nil {
   419  		return 0, plcTmplUnstruct, err
   420  	}
   421  
   422  	var policyID int32
   423  
   424  	cachedPolicyID, ok := r.ComplianceServerCtx.PolicyToID.Load(dbPolicy.Key())
   425  	if ok {
   426  		policyID = cachedPolicyID.(int32)
   427  
   428  		return policyID, plcTmplUnstruct, nil
   429  	}
   430  
   431  	// Try the database second before checking the replicated policy to be able to recover if the compliance history
   432  	// database is restored from backup and the IDs on the replicated policy no longer exist.
   433  	var dbErr error
   434  	if skipDB || r.ComplianceServerCtx.DB == nil {
   435  		dbErr = complianceeventsapi.ErrDBConnectionFailed
   436  	} else {
   437  		err = dbPolicy.GetOrCreate(ctx, r.ComplianceServerCtx.DB)
   438  		if err == nil {
   439  			r.ComplianceServerCtx.PolicyToID.Store(dbPolicy.Key(), dbPolicy.KeyID)
   440  
   441  			return dbPolicy.KeyID, plcTmplUnstruct, nil
   442  		}
   443  
   444  		dbErr = err
   445  	}
   446  
   447  	// Check if the existing policy template matches the existing one
   448  	var existingPlcTemplate *policiesv1.PolicyTemplate
   449  
   450  	existingPlcTmplUnstruct := unstructured.Unstructured{}
   451  
   452  	var existingAnnotation string
   453  	var existingDBPolicy *complianceeventsapi.Policy
   454  
   455  	// Try the existing policy template first before trying the database.
   456  	if existingReplPolicy != nil && len(existingReplPolicy.Spec.PolicyTemplates) >= replTemplateIdx+1 {
   457  		existingPlcTemplate = existingReplPolicy.Spec.PolicyTemplates[replTemplateIdx]
   458  
   459  		err = existingPlcTmplUnstruct.UnmarshalJSON(existingPlcTemplate.ObjectDefinition.Raw)
   460  		if err == nil {
   461  			existingAnnotations := existingPlcTmplUnstruct.GetAnnotations()
   462  			existingAnnotation = existingAnnotations[PolicyIDAnnotation]
   463  
   464  			if existingAnnotation != "" {
   465  				existingDBPolicy = complianceeventsapi.PolicyFromUnstructured(existingPlcTmplUnstruct)
   466  			}
   467  		}
   468  	}
   469  
   470  	// This is a continuation from the above if statement but this was broken up here to make it less indented.
   471  	if existingAnnotation != "" {
   472  		if err := existingDBPolicy.Validate(); err == nil {
   473  			if dbPolicy.Key() == existingDBPolicy.Key() {
   474  				policyID, err := strconv.ParseInt(existingAnnotation, 10, 32)
   475  				if err == nil && policyID != 0 {
   476  					r.ComplianceServerCtx.PolicyToID.Store(dbPolicy.Key(), int32(policyID))
   477  
   478  					return int32(policyID), plcTmplUnstruct, nil
   479  				}
   480  			}
   481  		}
   482  	}
   483  
   484  	return 0, plcTmplUnstruct, dbErr
   485  }
   486  
   487  // setDBAnnotations sets the parent policy ID on the replicated policy and the policy ID for each policy template.
   488  // If the DB connection is unavailable, it queues up a reconcile for when the DB becomes available.
   489  func (r *ReplicatedPolicyReconciler) setDBAnnotations(
   490  	ctx context.Context,
   491  	rootPolicy *policiesv1.Policy,
   492  	replicatedPolicy *policiesv1.Policy,
   493  	existingReplicatedPolicy *policiesv1.Policy,
   494  ) {
   495  	r.ComplianceServerCtx.Lock.RLock()
   496  	defer r.ComplianceServerCtx.Lock.RUnlock()
   497  
   498  	// Assume the database is connected unless told otherwise.
   499  	dbAvailable := true
   500  	var requeueForDB bool
   501  
   502  	annotations := replicatedPolicy.GetAnnotations()
   503  	if annotations == nil {
   504  		annotations = map[string]string{}
   505  	}
   506  
   507  	parentPolicyID, err := r.getParentPolicyID(ctx, rootPolicy, existingReplicatedPolicy)
   508  	if err != nil {
   509  		if errors.Is(err, complianceeventsapi.ErrDBConnectionFailed) {
   510  			dbAvailable = false
   511  		} else {
   512  			log.Error(
   513  				err, "Failed to get the parent policy ID", "name", rootPolicy.Name, "namespace", rootPolicy.Namespace,
   514  			)
   515  		}
   516  
   517  		requeueForDB = true
   518  
   519  		// Remove it if the user accidentally provided the annotation
   520  		if annotations[ParentPolicyIDAnnotation] != "" {
   521  			delete(annotations, PolicyIDAnnotation)
   522  			replicatedPolicy.SetAnnotations(annotations)
   523  		}
   524  	} else {
   525  		annotations[ParentPolicyIDAnnotation] = strconv.FormatInt(int64(parentPolicyID), 10)
   526  		replicatedPolicy.SetAnnotations(annotations)
   527  	}
   528  
   529  	for i, plcTemplate := range replicatedPolicy.Spec.PolicyTemplates {
   530  		if plcTemplate == nil {
   531  			continue
   532  		}
   533  
   534  		policyID, plcTmplUnstruct, err := r.getPolicyID(
   535  			ctx, replicatedPolicy, existingReplicatedPolicy, i, !dbAvailable,
   536  		)
   537  		if err != nil {
   538  			if errors.Is(err, complianceeventsapi.ErrDBConnectionFailed) {
   539  				dbAvailable = false
   540  			} else {
   541  				log.Error(
   542  					err,
   543  					"Failed to get the policy ID for the policy template",
   544  					"name", plcTmplUnstruct.GetName(),
   545  					"namespace", plcTmplUnstruct.GetNamespace(),
   546  					"index", i,
   547  				)
   548  			}
   549  
   550  			requeueForDB = true
   551  			tmplAnnotations := plcTmplUnstruct.GetAnnotations()
   552  
   553  			if tmplAnnotations[PolicyIDAnnotation] == "" {
   554  				continue
   555  			}
   556  
   557  			// Remove it if the user accidentally provided the annotation
   558  			delete(tmplAnnotations, PolicyIDAnnotation)
   559  			plcTmplUnstruct.SetAnnotations(tmplAnnotations)
   560  		} else {
   561  			tmplAnnotations := plcTmplUnstruct.GetAnnotations()
   562  			if tmplAnnotations == nil {
   563  				tmplAnnotations = map[string]string{}
   564  			}
   565  
   566  			tmplAnnotations[PolicyIDAnnotation] = strconv.FormatInt(int64(policyID), 10)
   567  			plcTmplUnstruct.SetAnnotations(tmplAnnotations)
   568  		}
   569  
   570  		updatedTemplate, err := plcTmplUnstruct.MarshalJSON()
   571  		if err != nil {
   572  			log.Error(
   573  				err, "Failed to set the annotation on the policy template", "index", i, "anotation", PolicyIDAnnotation,
   574  			)
   575  
   576  			continue
   577  		}
   578  
   579  		replicatedPolicy.Spec.PolicyTemplates[i].ObjectDefinition.Raw = updatedTemplate
   580  	}
   581  
   582  	if requeueForDB {
   583  		log.V(2).Info(
   584  			"The compliance events database is not available. Queuing this replicated policy to be reprocessed if the "+
   585  				"database becomes available.",
   586  			"namespace", replicatedPolicy.Namespace,
   587  			"name", replicatedPolicy.Name,
   588  		)
   589  		r.ComplianceServerCtx.Queue.Add(
   590  			types.NamespacedName{Namespace: replicatedPolicy.Namespace, Name: replicatedPolicy.Name},
   591  		)
   592  	}
   593  }
   594  
   595  func (r *ReplicatedPolicyReconciler) cleanUpReplicated(ctx context.Context, replicatedPolicy *policiesv1.Policy) error {
   596  	gvk := replicatedPolicy.GroupVersionKind()
   597  
   598  	objID := k8sdepwatches.ObjectIdentifier{
   599  		Group:     gvk.Group,
   600  		Version:   gvk.Version,
   601  		Kind:      gvk.Kind,
   602  		Namespace: replicatedPolicy.Namespace,
   603  		Name:      replicatedPolicy.Name,
   604  	}
   605  
   606  	watcherErr := r.DynamicWatcher.RemoveWatcher(objID)
   607  
   608  	uncacheErr := r.TemplateResolver.UncacheWatcher(objID)
   609  	if uncacheErr != nil {
   610  		if watcherErr == nil {
   611  			watcherErr = uncacheErr
   612  		} else {
   613  			watcherErr = fmt.Errorf("%w; %w", watcherErr, uncacheErr)
   614  		}
   615  	}
   616  
   617  	rsrcVersKey := replicatedPolicy.GetNamespace() + "/" + replicatedPolicy.GetName()
   618  
   619  	version := safeWriteLoad(r.ResourceVersions, rsrcVersKey)
   620  	defer version.Unlock()
   621  
   622  	deleteErr := r.Delete(ctx, replicatedPolicy)
   623  
   624  	if deleteErr != nil {
   625  		if k8serrors.IsNotFound(deleteErr) {
   626  			version.resourceVersion = "deleted"
   627  		}
   628  	} else {
   629  		version.resourceVersion = "deleted"
   630  
   631  		// Normally the spec-sync controller handles this, however, if it's a self-managed hub policy, the spec-sync
   632  		// controller does not run. So this is a special case.
   633  		if replicatedPolicy.Namespace == "local-cluster" && r.ComplianceServerCtx.DB != nil {
   634  			r.recordDisabledEvents(ctx, replicatedPolicy)
   635  		}
   636  	}
   637  
   638  	return errors.Join(watcherErr, deleteErr)
   639  }
   640  
   641  // recordDisabledEvents will generate and record disabled compliance events in the compliance history database for each
   642  // policy template. On retryable errors, the generated compliance event is added to the ComplianceServerCtx queue
   643  // for MonitorDatabaseConnection to handle once the database is back up.
   644  func (r *ReplicatedPolicyReconciler) recordDisabledEvents(
   645  	ctx context.Context, replicatedPolicy *policiesv1.Policy,
   646  ) {
   647  	log := log.WithValues("policy", replicatedPolicy.Name)
   648  
   649  	if replicatedPolicy.Annotations[ParentPolicyIDAnnotation] == "" {
   650  		return
   651  	}
   652  
   653  	parentPolicyID, err := strconv.ParseInt(replicatedPolicy.Annotations[ParentPolicyIDAnnotation], 10, 32)
   654  	if err != nil {
   655  		log.Error(err, "Failed to record a disabled compliance event due to an invalid parent policy ID")
   656  
   657  		return
   658  	}
   659  
   660  	dbConnectionDown := false
   661  
   662  	for _, template := range replicatedPolicy.Spec.PolicyTemplates {
   663  		plcTmplUnstruct := &unstructured.Unstructured{}
   664  
   665  		err := plcTmplUnstruct.UnmarshalJSON(template.ObjectDefinition.Raw)
   666  		if err != nil {
   667  			continue
   668  		}
   669  
   670  		policyIDStr := plcTmplUnstruct.GetAnnotations()[PolicyIDAnnotation]
   671  		if policyIDStr == "" {
   672  			continue
   673  		}
   674  
   675  		policyID, err := strconv.ParseInt(policyIDStr, 10, 32)
   676  		if err != nil {
   677  			log.Error(err, "Failed to record a disabled compliance event due to an invalid policy ID")
   678  
   679  			continue
   680  		}
   681  
   682  		complianceEvent := &complianceeventsapi.EventDetailsQueued{
   683  			ParentPolicyID: int32(parentPolicyID),
   684  			PolicyID:       int32(policyID),
   685  			Compliance:     "Disabled",
   686  			Message:        "The policy was removed because the parent policy no longer applies to this cluster",
   687  			Timestamp:      time.Now().UTC(),
   688  			ReportedBy:     "governance-policy-framework",
   689  		}
   690  
   691  		if dbConnectionDown {
   692  			log.Info(
   693  				"Failed to record the compliance event. Will requeue.",
   694  				"error", complianceeventsapi.ErrDBConnectionFailed.Error(),
   695  				"eventMessage", complianceEvent.Message,
   696  				"policyID", complianceEvent.PolicyID,
   697  			)
   698  
   699  			r.ComplianceServerCtx.Queue.Add(complianceEvent)
   700  
   701  			continue
   702  		}
   703  
   704  		err = complianceeventsapi.RecordLocalClusterComplianceEvent(
   705  			ctx, r.ComplianceServerCtx, complianceEvent.EventDetails(),
   706  		)
   707  
   708  		requeue := errors.Is(err, complianceeventsapi.ErrRetryable)
   709  		if requeue {
   710  			r.ComplianceServerCtx.Queue.Add(complianceEvent)
   711  		}
   712  
   713  		if errors.Is(err, complianceeventsapi.ErrDBConnectionFailed) {
   714  			dbConnectionDown = true
   715  		}
   716  
   717  		if err != nil {
   718  			log.Info(
   719  				"Failed to record the compliance event",
   720  				"requeue", requeue,
   721  				"error", err.Error(),
   722  				"eventMessage", complianceEvent.Message,
   723  				"policyID", complianceEvent.PolicyID,
   724  			)
   725  		} else {
   726  			log.V(2).Info(
   727  				"Recorded the compliance event",
   728  				"eventMessage", complianceEvent.Message,
   729  				"policyID", complianceEvent.PolicyID,
   730  			)
   731  		}
   732  	}
   733  }
   734  
   735  func (r *ReplicatedPolicyReconciler) singleClusterDecision(
   736  	ctx context.Context, rootPlc *policiesv1.Policy, clusterName string,
   737  ) (decision clusterDecision, err error) {
   738  	positiveDecision := clusterDecision{
   739  		Cluster: clusterName,
   740  	}
   741  
   742  	pbList := &policiesv1.PlacementBindingList{}
   743  
   744  	err = r.List(ctx, pbList, &client.ListOptions{Namespace: rootPlc.GetNamespace()})
   745  	if err != nil {
   746  		return clusterDecision{}, err
   747  	}
   748  
   749  	foundWithoutSubFilter := false
   750  
   751  	// Process all placement bindings without subFilter
   752  	for i, pb := range pbList.Items {
   753  		if pb.SubFilter == policiesv1.Restricted {
   754  			continue
   755  		}
   756  
   757  		found, err := r.isSingleClusterInDecisions(ctx, &pbList.Items[i], rootPlc.GetName(), clusterName)
   758  		if err != nil {
   759  			return clusterDecision{}, err
   760  		}
   761  
   762  		if !found {
   763  			continue
   764  		}
   765  
   766  		if strings.EqualFold(pb.BindingOverrides.RemediationAction, string(policiesv1.Enforce)) {
   767  			positiveDecision.PolicyOverrides = pb.BindingOverrides
   768  			// If an override is found, then no other decisions can currently change this result.
   769  			// NOTE: if additional overrides are added in the future, this will additional logic.
   770  			return positiveDecision, nil
   771  		}
   772  
   773  		foundWithoutSubFilter = true
   774  	}
   775  
   776  	if !foundWithoutSubFilter {
   777  		// No need to look through the subFilter bindings.
   778  		return clusterDecision{}, nil
   779  	}
   780  
   781  	// Process all placement bindings with subFilter
   782  	for i, pb := range pbList.Items {
   783  		if pb.SubFilter != policiesv1.Restricted {
   784  			continue
   785  		}
   786  
   787  		found, err := r.isSingleClusterInDecisions(ctx, &pbList.Items[i], rootPlc.GetName(), clusterName)
   788  		if err != nil {
   789  			return clusterDecision{}, err
   790  		}
   791  
   792  		if !found {
   793  			continue
   794  		}
   795  
   796  		if strings.EqualFold(pb.BindingOverrides.RemediationAction, string(policiesv1.Enforce)) {
   797  			positiveDecision.PolicyOverrides = pb.BindingOverrides
   798  			// If an override is found, then no other decisions can currently change this result.
   799  			// NOTE: if additional overrides are added in the future, this will additional logic.
   800  			return positiveDecision, nil
   801  		}
   802  	}
   803  
   804  	// None of the bindings had any overrides.
   805  	return positiveDecision, nil
   806  }
   807  
   808  func (r *ReplicatedPolicyReconciler) isSingleClusterInDecisions(
   809  	ctx context.Context, pb *policiesv1.PlacementBinding, policyName, clusterName string,
   810  ) (found bool, err error) {
   811  	if !common.HasValidPlacementRef(pb) {
   812  		return false, nil
   813  	}
   814  
   815  	subjectFound := false
   816  
   817  	for _, subject := range pb.Subjects {
   818  		if subject.APIGroup != policiesv1.SchemeGroupVersion.Group {
   819  			continue
   820  		}
   821  
   822  		switch subject.Kind {
   823  		case policiesv1.Kind:
   824  			if subject.Name == policyName {
   825  				subjectFound = true
   826  			}
   827  		case policiesv1.PolicySetKind:
   828  			if common.IsPolicyInPolicySet(ctx, r.Client, policyName, subject.Name, pb.GetNamespace()) {
   829  				subjectFound = true
   830  			}
   831  		}
   832  
   833  		if subjectFound {
   834  			break
   835  		}
   836  	}
   837  
   838  	if !subjectFound {
   839  		return false, nil
   840  	}
   841  
   842  	refNN := types.NamespacedName{
   843  		Namespace: pb.GetNamespace(),
   844  		Name:      pb.PlacementRef.Name,
   845  	}
   846  
   847  	switch pb.PlacementRef.Kind {
   848  	case "PlacementRule":
   849  		plr := &appsv1.PlacementRule{}
   850  		if err := r.Get(ctx, refNN, plr); err != nil && !k8serrors.IsNotFound(err) {
   851  			return false, fmt.Errorf("failed to get PlacementRule '%v': %w", pb.PlacementRef.Name, err)
   852  		}
   853  
   854  		for _, decision := range plr.Status.Decisions {
   855  			if decision.ClusterName == clusterName {
   856  				return true, nil
   857  			}
   858  		}
   859  	case "Placement":
   860  		pl := &clusterv1beta1.Placement{}
   861  		if err := r.Get(ctx, refNN, pl); err != nil && !k8serrors.IsNotFound(err) {
   862  			return false, fmt.Errorf("failed to get Placement '%v': %w", pb.PlacementRef.Name, err)
   863  		}
   864  
   865  		if k8serrors.IsNotFound(err) {
   866  			return false, nil
   867  		}
   868  
   869  		list := &clusterv1beta1.PlacementDecisionList{}
   870  		lopts := &client.ListOptions{Namespace: pb.GetNamespace()}
   871  
   872  		opts := client.MatchingLabels{"cluster.open-cluster-management.io/placement": pl.GetName()}
   873  		opts.ApplyToList(lopts)
   874  
   875  		err = r.List(ctx, list, lopts)
   876  		if err != nil && !k8serrors.IsNotFound(err) {
   877  			return false, fmt.Errorf("failed to list the PlacementDecisions for '%v', %w", pb.PlacementRef.Name, err)
   878  		}
   879  
   880  		for _, item := range list.Items {
   881  			for _, cluster := range item.Status.Decisions {
   882  				if cluster.ClusterName == clusterName {
   883  					return true, nil
   884  				}
   885  			}
   886  		}
   887  	}
   888  
   889  	return false, nil
   890  }