open-cluster-management.io/governance-policy-propagator@v0.13.0/controllers/propagator/replicatedpolicy_controller.go (about) 1 package propagator 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "strconv" 8 "strings" 9 "sync" 10 "time" 11 12 templates "github.com/stolostron/go-template-utils/v4/pkg/templates" 13 k8sdepwatches "github.com/stolostron/kubernetes-dependency-watches/client" 14 k8serrors "k8s.io/apimachinery/pkg/api/errors" 15 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 16 "k8s.io/apimachinery/pkg/types" 17 clusterv1beta1 "open-cluster-management.io/api/cluster/v1beta1" 18 appsv1 "open-cluster-management.io/multicloud-operators-subscription/pkg/apis/apps/placementrule/v1" 19 ctrl "sigs.k8s.io/controller-runtime" 20 "sigs.k8s.io/controller-runtime/pkg/client" 21 "sigs.k8s.io/controller-runtime/pkg/reconcile" 22 23 policiesv1 "open-cluster-management.io/governance-policy-propagator/api/v1" 24 "open-cluster-management.io/governance-policy-propagator/controllers/common" 25 "open-cluster-management.io/governance-policy-propagator/controllers/complianceeventsapi" 26 ) 27 28 const ( 29 ParentPolicyIDAnnotation = "policy.open-cluster-management.io/parent-policy-compliance-db-id" 30 PolicyIDAnnotation = "policy.open-cluster-management.io/policy-compliance-db-id" 31 ) 32 33 var _ reconcile.Reconciler = &ReplicatedPolicyReconciler{} 34 35 type ReplicatedPolicyReconciler struct { 36 Propagator 37 ResourceVersions *sync.Map 38 DynamicWatcher k8sdepwatches.DynamicWatcher 39 TemplateResolver *templates.TemplateResolver 40 ComplianceServerCtx *complianceeventsapi.ComplianceServerCtx 41 } 42 43 func (r *ReplicatedPolicyReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) { 44 log := log.WithValues("Request.Namespace", request.Namespace, "Request.Name", request.Name) 45 log.Info("Reconciling the replicated policy") 46 47 // Set the hub template watch metric after reconcile 48 defer func() { 49 hubTempWatches := r.TemplateResolver.GetWatchCount() 50 log.V(3).Info("Setting hub template watch metric", "value", hubTempWatches) 51 52 hubTemplateActiveWatchesMetric.Set(float64(hubTempWatches)) 53 }() 54 55 replicatedExists := true 56 replicatedPolicy := &policiesv1.Policy{} 57 58 if err := r.Get(ctx, request.NamespacedName, replicatedPolicy); err != nil { 59 if !k8serrors.IsNotFound(err) { 60 log.Error(err, "Failed to get the replicated policy") 61 62 return reconcile.Result{}, err 63 } 64 65 replicatedExists = false 66 } 67 68 rootName, rootNS, err := common.ParseRootPolicyLabel(request.Name) 69 if err != nil { 70 if !replicatedExists { 71 log.Error(err, "Invalid replicated policy sent for reconcile, rejecting") 72 73 return reconcile.Result{}, nil 74 } 75 76 cleanUpErr := r.cleanUpReplicated(ctx, replicatedPolicy) 77 if cleanUpErr != nil && !k8serrors.IsNotFound(cleanUpErr) { 78 log.Error(err, "Failed to delete the invalid replicated policy, requeueing") 79 80 return reconcile.Result{}, err 81 } 82 83 log.Info("Invalid replicated policy deleted") 84 85 return reconcile.Result{}, nil 86 } 87 88 rsrcVersKey := request.Namespace + "/" + request.Name 89 90 // Fetch the Root Policy instance 91 rootPolicy := &policiesv1.Policy{} 92 rootNN := types.NamespacedName{Namespace: rootNS, Name: rootName} 93 94 if err := r.Get(ctx, rootNN, rootPolicy); err != nil { 95 if !k8serrors.IsNotFound(err) { 96 log.Error(err, "Failed to get the root policy, requeueing") 97 98 return reconcile.Result{}, err 99 } 100 101 if !replicatedExists { 102 version := safeWriteLoad(r.ResourceVersions, rsrcVersKey) 103 defer version.Unlock() 104 105 // Store this to ensure the cache matches a known possible state for this situation 106 version.resourceVersion = "deleted" 107 108 log.V(1).Info("Root policy and replicated policy already missing") 109 110 return reconcile.Result{}, nil 111 } 112 113 inClusterNS, err := common.IsInClusterNamespace(ctx, r.Client, request.Namespace) 114 if err != nil { 115 return reconcile.Result{}, err 116 } 117 118 if !inClusterNS { 119 // "Hub of hubs" scenario: this cluster is managed by another cluster, 120 // which has the root policy for the policy being reconciled. 121 log.V(1).Info("Found a replicated policy in non-cluster namespace, skipping it") 122 123 return reconcile.Result{}, nil 124 } 125 126 // otherwise, we need to clean it up 127 if err := r.cleanUpReplicated(ctx, replicatedPolicy); err != nil { 128 if !k8serrors.IsNotFound(err) { 129 log.Error(err, "Failed to delete the orphaned replicated policy, requeueing") 130 131 return reconcile.Result{}, err 132 } 133 } 134 135 log.Info("Orphaned replicated policy deleted") 136 137 return reconcile.Result{}, nil 138 } 139 140 if rootPolicy.Spec.Disabled { 141 if replicatedExists { 142 if err := r.cleanUpReplicated(ctx, replicatedPolicy); err != nil { 143 if !k8serrors.IsNotFound(err) { 144 log.Error(err, "Failed to delete the disabled replicated policy, requeueing") 145 146 return reconcile.Result{}, err 147 } 148 } 149 150 log.Info("Disabled replicated policy deleted") 151 152 return reconcile.Result{}, nil 153 } 154 155 version := safeWriteLoad(r.ResourceVersions, rsrcVersKey) 156 defer version.Unlock() 157 158 // Store this to ensure the cache matches a known possible state for this situation 159 version.resourceVersion = "deleted" 160 161 log.V(1).Info("Root policy is disabled, and replicated policy correctly not found.") 162 163 return reconcile.Result{}, nil 164 } 165 166 // calculate the decision for this specific cluster 167 decision, err := r.singleClusterDecision(ctx, rootPolicy, request.Namespace) 168 if err != nil { 169 log.Error(err, "Failed to determine if policy should be replicated, requeueing") 170 171 return reconcile.Result{}, err 172 } 173 174 // an empty decision means the policy should not be replicated 175 if decision.Cluster == "" { 176 if replicatedExists { 177 inClusterNS, err := common.IsInClusterNamespace(ctx, r.Client, request.Namespace) 178 if err != nil { 179 return reconcile.Result{}, err 180 } 181 182 if !inClusterNS { 183 // "Hosted mode" scenario: this cluster is hosting another cluster, which is syncing 184 // this policy to a cluster namespace that this propagator doesn't know about. 185 log.V(1).Info("Found a possible replicated policy for a hosted cluster, skipping it") 186 187 return reconcile.Result{}, nil 188 } 189 190 if err := r.cleanUpReplicated(ctx, replicatedPolicy); err != nil { 191 if !k8serrors.IsNotFound(err) { 192 log.Error(err, "Failed to remove the replicated policy for this managed cluster, requeueing") 193 194 return reconcile.Result{}, err 195 } 196 } 197 198 log.Info("Removed replicated policy from managed cluster") 199 200 return reconcile.Result{}, nil 201 } 202 203 version := safeWriteLoad(r.ResourceVersions, rsrcVersKey) 204 defer version.Unlock() 205 206 // Store this to ensure the cache matches a known possible state for this situation 207 version.resourceVersion = "deleted" 208 209 log.V(1).Info("Replicated policy should not exist on this managed cluster, and does not.") 210 211 return reconcile.Result{}, nil 212 } 213 214 objsToWatch := getPolicySetDependencies(rootPolicy) 215 216 desiredReplicatedPolicy, err := r.buildReplicatedPolicy(ctx, rootPolicy, decision) 217 if err != nil { 218 log.Error(err, "Unable to build desired replicated policy, requeueing") 219 220 return reconcile.Result{}, err 221 } 222 223 instanceGVK := desiredReplicatedPolicy.GroupVersionKind() 224 instanceObjID := k8sdepwatches.ObjectIdentifier{ 225 Group: instanceGVK.Group, 226 Version: instanceGVK.Version, 227 Kind: instanceGVK.Kind, 228 Namespace: request.Namespace, 229 Name: request.Name, 230 } 231 232 // save the watcherError for later, so that the policy can still be updated now. 233 var watcherErr error 234 235 if policyHasTemplates(rootPolicy) { 236 if replicatedExists { 237 // If the replicated policy has an initialization vector specified, set it for processing 238 if initializationVector, ok := replicatedPolicy.Annotations[IVAnnotation]; ok { 239 tempAnnotations := desiredReplicatedPolicy.GetAnnotations() 240 if tempAnnotations == nil { 241 tempAnnotations = make(map[string]string) 242 } 243 244 tempAnnotations[IVAnnotation] = initializationVector 245 246 desiredReplicatedPolicy.SetAnnotations(tempAnnotations) 247 } 248 } 249 250 // Any errors to expose to the user are logged and recorded in the processTemplates method. Only retry 251 // the request if it's determined to be a retryable error (i.e. don't retry syntax errors). 252 err := r.processTemplates(ctx, desiredReplicatedPolicy, decision.Cluster, rootPolicy) 253 if errors.Is(err, ErrRetryable) { 254 // Return the error if it's retryable, which will utilize controller-runtime's exponential backoff. 255 return reconcile.Result{}, err 256 } 257 } else { 258 watcherErr := r.TemplateResolver.UncacheWatcher(instanceObjID) 259 if watcherErr != nil { 260 log.Error(watcherErr, "Failed to uncache objects related to the replicated policy's templates") 261 } 262 } 263 264 r.setDBAnnotations(ctx, rootPolicy, desiredReplicatedPolicy, replicatedPolicy) 265 266 if len(objsToWatch) != 0 { 267 refObjs := make([]k8sdepwatches.ObjectIdentifier, 0, len(objsToWatch)) 268 for objToWatch := range objsToWatch { 269 refObjs = append(refObjs, objToWatch) 270 } 271 272 watcherErr = r.DynamicWatcher.AddOrUpdateWatcher(instanceObjID, refObjs...) 273 if watcherErr != nil { 274 log.Error(watcherErr, "Failed to update the dynamic watches for the policy set dependencies") 275 } 276 } else { 277 watcherErr = r.DynamicWatcher.RemoveWatcher(instanceObjID) 278 if watcherErr != nil { 279 log.Error(watcherErr, "Failed to remove the dynamic watches for the hub policy templates") 280 } 281 } 282 283 if !replicatedExists { 284 version := safeWriteLoad(r.ResourceVersions, rsrcVersKey) 285 defer version.Unlock() 286 287 err = r.Create(ctx, desiredReplicatedPolicy) 288 if err != nil { 289 log.Error(err, "Failed to create the replicated policy, requeueing") 290 291 return reconcile.Result{}, err 292 } 293 294 r.Recorder.Event(rootPolicy, "Normal", "PolicyPropagation", 295 fmt.Sprintf("Policy %s/%s was propagated to cluster %s", rootPolicy.GetNamespace(), 296 rootPolicy.GetName(), decision.Cluster)) 297 298 version.resourceVersion = desiredReplicatedPolicy.GetResourceVersion() 299 300 log.Info("Created replicated policy") 301 302 return reconcile.Result{}, watcherErr 303 } 304 305 version := safeWriteLoad(r.ResourceVersions, rsrcVersKey) 306 defer version.Unlock() 307 308 // replicated policy already created, need to compare and possibly update 309 if !equivalentReplicatedPolicies(desiredReplicatedPolicy, replicatedPolicy) { 310 replicatedPolicy.SetAnnotations(desiredReplicatedPolicy.GetAnnotations()) 311 replicatedPolicy.SetLabels(desiredReplicatedPolicy.GetLabels()) 312 replicatedPolicy.Spec = desiredReplicatedPolicy.Spec 313 314 err = r.Update(ctx, replicatedPolicy) 315 if err != nil { 316 log.Error(err, "Failed to update the replicated policy, requeueing") 317 318 return reconcile.Result{}, err 319 } 320 321 r.Recorder.Event(rootPolicy, "Normal", "PolicyPropagation", 322 fmt.Sprintf("Policy %s/%s was updated for cluster %s", rootPolicy.GetNamespace(), 323 rootPolicy.GetName(), decision.Cluster)) 324 325 log.Info("Replicated policy updated") 326 } else { 327 log.Info("Replicated policy matches, no update needed") 328 } 329 330 // whether it was updated or not, this resourceVersion can be cached 331 version.resourceVersion = replicatedPolicy.GetResourceVersion() 332 333 if watcherErr != nil { 334 log.Info("Requeueing for the dynamic watcher error") 335 } 336 337 return reconcile.Result{}, watcherErr 338 } 339 340 // getParentPolicyID needs to have the caller call r.ComplianceServerCtx.Lock.RLock. 341 func (r *ReplicatedPolicyReconciler) getParentPolicyID( 342 ctx context.Context, 343 rootPolicy *policiesv1.Policy, 344 existingReplicatedPolicy *policiesv1.Policy, 345 ) (int32, error) { 346 dbParentPolicy := complianceeventsapi.ParentPolicyFromPolicyObj(rootPolicy) 347 348 // Check the cache first. 349 cachedParentPolicyID, ok := r.ComplianceServerCtx.ParentPolicyToID.Load(dbParentPolicy.Key()) 350 if ok { 351 return cachedParentPolicyID.(int32), nil 352 } 353 354 // Try the database second before checking the replicated policy to be able to recover if the compliance history 355 // database is restored from backup and the IDs on the replicated policy no longer exist. 356 var dbErr error 357 358 if r.ComplianceServerCtx.DB != nil { 359 err := dbParentPolicy.GetOrCreate(ctx, r.ComplianceServerCtx.DB) 360 if err == nil { 361 r.ComplianceServerCtx.ParentPolicyToID.Store(dbParentPolicy.Key(), dbParentPolicy.KeyID) 362 363 return dbParentPolicy.KeyID, nil 364 } 365 366 if r.ComplianceServerCtx.DB.PingContext(ctx) != nil { 367 dbErr = complianceeventsapi.ErrDBConnectionFailed 368 } else { 369 dbErr = fmt.Errorf("%w: failed to get the database ID of the parent policy", err) 370 } 371 } else { 372 dbErr = complianceeventsapi.ErrDBConnectionFailed 373 } 374 375 // Check if the existing replicated policy already has the annotation set and has the same 376 // categories, controls, and standards as the current root policy. 377 var parentPolicyIDFromRepl string 378 if existingReplicatedPolicy != nil { 379 parentPolicyIDFromRepl = existingReplicatedPolicy.Annotations[ParentPolicyIDAnnotation] 380 } 381 382 if parentPolicyIDFromRepl != "" { 383 dbParentPolicyFromRepl := complianceeventsapi.ParentPolicyFromPolicyObj(existingReplicatedPolicy) 384 dbParentPolicyFromRepl.Name = rootPolicy.Name 385 dbParentPolicyFromRepl.Namespace = rootPolicy.Namespace 386 387 if dbParentPolicy.Key() == dbParentPolicyFromRepl.Key() { 388 parentPolicyID, err := strconv.ParseInt(parentPolicyIDFromRepl, 10, 32) 389 if err == nil && parentPolicyID != 0 { 390 r.ComplianceServerCtx.ParentPolicyToID.Store(dbParentPolicy.Key(), int32(parentPolicyID)) 391 392 return int32(parentPolicyID), nil 393 } 394 } 395 } 396 397 return 0, dbErr 398 } 399 400 // getPolicyID needs to have the caller call r.ComplianceServerCtx.Lock.RLock. 401 func (r *ReplicatedPolicyReconciler) getPolicyID( 402 ctx context.Context, 403 replPolicy *policiesv1.Policy, 404 existingReplPolicy *policiesv1.Policy, 405 replTemplateIdx int, 406 skipDB bool, 407 ) (int32, *unstructured.Unstructured, error) { 408 // Start by checking the cache. 409 plcTemplate := replPolicy.Spec.PolicyTemplates[replTemplateIdx] 410 plcTmplUnstruct := &unstructured.Unstructured{} 411 412 err := plcTmplUnstruct.UnmarshalJSON(plcTemplate.ObjectDefinition.Raw) 413 if err != nil { 414 return 0, plcTmplUnstruct, err 415 } 416 417 dbPolicy := complianceeventsapi.PolicyFromUnstructured(*plcTmplUnstruct) 418 if err := dbPolicy.Validate(); err != nil { 419 return 0, plcTmplUnstruct, err 420 } 421 422 var policyID int32 423 424 cachedPolicyID, ok := r.ComplianceServerCtx.PolicyToID.Load(dbPolicy.Key()) 425 if ok { 426 policyID = cachedPolicyID.(int32) 427 428 return policyID, plcTmplUnstruct, nil 429 } 430 431 // Try the database second before checking the replicated policy to be able to recover if the compliance history 432 // database is restored from backup and the IDs on the replicated policy no longer exist. 433 var dbErr error 434 if skipDB || r.ComplianceServerCtx.DB == nil { 435 dbErr = complianceeventsapi.ErrDBConnectionFailed 436 } else { 437 err = dbPolicy.GetOrCreate(ctx, r.ComplianceServerCtx.DB) 438 if err == nil { 439 r.ComplianceServerCtx.PolicyToID.Store(dbPolicy.Key(), dbPolicy.KeyID) 440 441 return dbPolicy.KeyID, plcTmplUnstruct, nil 442 } 443 444 dbErr = err 445 } 446 447 // Check if the existing policy template matches the existing one 448 var existingPlcTemplate *policiesv1.PolicyTemplate 449 450 existingPlcTmplUnstruct := unstructured.Unstructured{} 451 452 var existingAnnotation string 453 var existingDBPolicy *complianceeventsapi.Policy 454 455 // Try the existing policy template first before trying the database. 456 if existingReplPolicy != nil && len(existingReplPolicy.Spec.PolicyTemplates) >= replTemplateIdx+1 { 457 existingPlcTemplate = existingReplPolicy.Spec.PolicyTemplates[replTemplateIdx] 458 459 err = existingPlcTmplUnstruct.UnmarshalJSON(existingPlcTemplate.ObjectDefinition.Raw) 460 if err == nil { 461 existingAnnotations := existingPlcTmplUnstruct.GetAnnotations() 462 existingAnnotation = existingAnnotations[PolicyIDAnnotation] 463 464 if existingAnnotation != "" { 465 existingDBPolicy = complianceeventsapi.PolicyFromUnstructured(existingPlcTmplUnstruct) 466 } 467 } 468 } 469 470 // This is a continuation from the above if statement but this was broken up here to make it less indented. 471 if existingAnnotation != "" { 472 if err := existingDBPolicy.Validate(); err == nil { 473 if dbPolicy.Key() == existingDBPolicy.Key() { 474 policyID, err := strconv.ParseInt(existingAnnotation, 10, 32) 475 if err == nil && policyID != 0 { 476 r.ComplianceServerCtx.PolicyToID.Store(dbPolicy.Key(), int32(policyID)) 477 478 return int32(policyID), plcTmplUnstruct, nil 479 } 480 } 481 } 482 } 483 484 return 0, plcTmplUnstruct, dbErr 485 } 486 487 // setDBAnnotations sets the parent policy ID on the replicated policy and the policy ID for each policy template. 488 // If the DB connection is unavailable, it queues up a reconcile for when the DB becomes available. 489 func (r *ReplicatedPolicyReconciler) setDBAnnotations( 490 ctx context.Context, 491 rootPolicy *policiesv1.Policy, 492 replicatedPolicy *policiesv1.Policy, 493 existingReplicatedPolicy *policiesv1.Policy, 494 ) { 495 r.ComplianceServerCtx.Lock.RLock() 496 defer r.ComplianceServerCtx.Lock.RUnlock() 497 498 // Assume the database is connected unless told otherwise. 499 dbAvailable := true 500 var requeueForDB bool 501 502 annotations := replicatedPolicy.GetAnnotations() 503 if annotations == nil { 504 annotations = map[string]string{} 505 } 506 507 parentPolicyID, err := r.getParentPolicyID(ctx, rootPolicy, existingReplicatedPolicy) 508 if err != nil { 509 if errors.Is(err, complianceeventsapi.ErrDBConnectionFailed) { 510 dbAvailable = false 511 } else { 512 log.Error( 513 err, "Failed to get the parent policy ID", "name", rootPolicy.Name, "namespace", rootPolicy.Namespace, 514 ) 515 } 516 517 requeueForDB = true 518 519 // Remove it if the user accidentally provided the annotation 520 if annotations[ParentPolicyIDAnnotation] != "" { 521 delete(annotations, PolicyIDAnnotation) 522 replicatedPolicy.SetAnnotations(annotations) 523 } 524 } else { 525 annotations[ParentPolicyIDAnnotation] = strconv.FormatInt(int64(parentPolicyID), 10) 526 replicatedPolicy.SetAnnotations(annotations) 527 } 528 529 for i, plcTemplate := range replicatedPolicy.Spec.PolicyTemplates { 530 if plcTemplate == nil { 531 continue 532 } 533 534 policyID, plcTmplUnstruct, err := r.getPolicyID( 535 ctx, replicatedPolicy, existingReplicatedPolicy, i, !dbAvailable, 536 ) 537 if err != nil { 538 if errors.Is(err, complianceeventsapi.ErrDBConnectionFailed) { 539 dbAvailable = false 540 } else { 541 log.Error( 542 err, 543 "Failed to get the policy ID for the policy template", 544 "name", plcTmplUnstruct.GetName(), 545 "namespace", plcTmplUnstruct.GetNamespace(), 546 "index", i, 547 ) 548 } 549 550 requeueForDB = true 551 tmplAnnotations := plcTmplUnstruct.GetAnnotations() 552 553 if tmplAnnotations[PolicyIDAnnotation] == "" { 554 continue 555 } 556 557 // Remove it if the user accidentally provided the annotation 558 delete(tmplAnnotations, PolicyIDAnnotation) 559 plcTmplUnstruct.SetAnnotations(tmplAnnotations) 560 } else { 561 tmplAnnotations := plcTmplUnstruct.GetAnnotations() 562 if tmplAnnotations == nil { 563 tmplAnnotations = map[string]string{} 564 } 565 566 tmplAnnotations[PolicyIDAnnotation] = strconv.FormatInt(int64(policyID), 10) 567 plcTmplUnstruct.SetAnnotations(tmplAnnotations) 568 } 569 570 updatedTemplate, err := plcTmplUnstruct.MarshalJSON() 571 if err != nil { 572 log.Error( 573 err, "Failed to set the annotation on the policy template", "index", i, "anotation", PolicyIDAnnotation, 574 ) 575 576 continue 577 } 578 579 replicatedPolicy.Spec.PolicyTemplates[i].ObjectDefinition.Raw = updatedTemplate 580 } 581 582 if requeueForDB { 583 log.V(2).Info( 584 "The compliance events database is not available. Queuing this replicated policy to be reprocessed if the "+ 585 "database becomes available.", 586 "namespace", replicatedPolicy.Namespace, 587 "name", replicatedPolicy.Name, 588 ) 589 r.ComplianceServerCtx.Queue.Add( 590 types.NamespacedName{Namespace: replicatedPolicy.Namespace, Name: replicatedPolicy.Name}, 591 ) 592 } 593 } 594 595 func (r *ReplicatedPolicyReconciler) cleanUpReplicated(ctx context.Context, replicatedPolicy *policiesv1.Policy) error { 596 gvk := replicatedPolicy.GroupVersionKind() 597 598 objID := k8sdepwatches.ObjectIdentifier{ 599 Group: gvk.Group, 600 Version: gvk.Version, 601 Kind: gvk.Kind, 602 Namespace: replicatedPolicy.Namespace, 603 Name: replicatedPolicy.Name, 604 } 605 606 watcherErr := r.DynamicWatcher.RemoveWatcher(objID) 607 608 uncacheErr := r.TemplateResolver.UncacheWatcher(objID) 609 if uncacheErr != nil { 610 if watcherErr == nil { 611 watcherErr = uncacheErr 612 } else { 613 watcherErr = fmt.Errorf("%w; %w", watcherErr, uncacheErr) 614 } 615 } 616 617 rsrcVersKey := replicatedPolicy.GetNamespace() + "/" + replicatedPolicy.GetName() 618 619 version := safeWriteLoad(r.ResourceVersions, rsrcVersKey) 620 defer version.Unlock() 621 622 deleteErr := r.Delete(ctx, replicatedPolicy) 623 624 if deleteErr != nil { 625 if k8serrors.IsNotFound(deleteErr) { 626 version.resourceVersion = "deleted" 627 } 628 } else { 629 version.resourceVersion = "deleted" 630 631 // Normally the spec-sync controller handles this, however, if it's a self-managed hub policy, the spec-sync 632 // controller does not run. So this is a special case. 633 if replicatedPolicy.Namespace == "local-cluster" && r.ComplianceServerCtx.DB != nil { 634 r.recordDisabledEvents(ctx, replicatedPolicy) 635 } 636 } 637 638 return errors.Join(watcherErr, deleteErr) 639 } 640 641 // recordDisabledEvents will generate and record disabled compliance events in the compliance history database for each 642 // policy template. On retryable errors, the generated compliance event is added to the ComplianceServerCtx queue 643 // for MonitorDatabaseConnection to handle once the database is back up. 644 func (r *ReplicatedPolicyReconciler) recordDisabledEvents( 645 ctx context.Context, replicatedPolicy *policiesv1.Policy, 646 ) { 647 log := log.WithValues("policy", replicatedPolicy.Name) 648 649 if replicatedPolicy.Annotations[ParentPolicyIDAnnotation] == "" { 650 return 651 } 652 653 parentPolicyID, err := strconv.ParseInt(replicatedPolicy.Annotations[ParentPolicyIDAnnotation], 10, 32) 654 if err != nil { 655 log.Error(err, "Failed to record a disabled compliance event due to an invalid parent policy ID") 656 657 return 658 } 659 660 dbConnectionDown := false 661 662 for _, template := range replicatedPolicy.Spec.PolicyTemplates { 663 plcTmplUnstruct := &unstructured.Unstructured{} 664 665 err := plcTmplUnstruct.UnmarshalJSON(template.ObjectDefinition.Raw) 666 if err != nil { 667 continue 668 } 669 670 policyIDStr := plcTmplUnstruct.GetAnnotations()[PolicyIDAnnotation] 671 if policyIDStr == "" { 672 continue 673 } 674 675 policyID, err := strconv.ParseInt(policyIDStr, 10, 32) 676 if err != nil { 677 log.Error(err, "Failed to record a disabled compliance event due to an invalid policy ID") 678 679 continue 680 } 681 682 complianceEvent := &complianceeventsapi.EventDetailsQueued{ 683 ParentPolicyID: int32(parentPolicyID), 684 PolicyID: int32(policyID), 685 Compliance: "Disabled", 686 Message: "The policy was removed because the parent policy no longer applies to this cluster", 687 Timestamp: time.Now().UTC(), 688 ReportedBy: "governance-policy-framework", 689 } 690 691 if dbConnectionDown { 692 log.Info( 693 "Failed to record the compliance event. Will requeue.", 694 "error", complianceeventsapi.ErrDBConnectionFailed.Error(), 695 "eventMessage", complianceEvent.Message, 696 "policyID", complianceEvent.PolicyID, 697 ) 698 699 r.ComplianceServerCtx.Queue.Add(complianceEvent) 700 701 continue 702 } 703 704 err = complianceeventsapi.RecordLocalClusterComplianceEvent( 705 ctx, r.ComplianceServerCtx, complianceEvent.EventDetails(), 706 ) 707 708 requeue := errors.Is(err, complianceeventsapi.ErrRetryable) 709 if requeue { 710 r.ComplianceServerCtx.Queue.Add(complianceEvent) 711 } 712 713 if errors.Is(err, complianceeventsapi.ErrDBConnectionFailed) { 714 dbConnectionDown = true 715 } 716 717 if err != nil { 718 log.Info( 719 "Failed to record the compliance event", 720 "requeue", requeue, 721 "error", err.Error(), 722 "eventMessage", complianceEvent.Message, 723 "policyID", complianceEvent.PolicyID, 724 ) 725 } else { 726 log.V(2).Info( 727 "Recorded the compliance event", 728 "eventMessage", complianceEvent.Message, 729 "policyID", complianceEvent.PolicyID, 730 ) 731 } 732 } 733 } 734 735 func (r *ReplicatedPolicyReconciler) singleClusterDecision( 736 ctx context.Context, rootPlc *policiesv1.Policy, clusterName string, 737 ) (decision clusterDecision, err error) { 738 positiveDecision := clusterDecision{ 739 Cluster: clusterName, 740 } 741 742 pbList := &policiesv1.PlacementBindingList{} 743 744 err = r.List(ctx, pbList, &client.ListOptions{Namespace: rootPlc.GetNamespace()}) 745 if err != nil { 746 return clusterDecision{}, err 747 } 748 749 foundWithoutSubFilter := false 750 751 // Process all placement bindings without subFilter 752 for i, pb := range pbList.Items { 753 if pb.SubFilter == policiesv1.Restricted { 754 continue 755 } 756 757 found, err := r.isSingleClusterInDecisions(ctx, &pbList.Items[i], rootPlc.GetName(), clusterName) 758 if err != nil { 759 return clusterDecision{}, err 760 } 761 762 if !found { 763 continue 764 } 765 766 if strings.EqualFold(pb.BindingOverrides.RemediationAction, string(policiesv1.Enforce)) { 767 positiveDecision.PolicyOverrides = pb.BindingOverrides 768 // If an override is found, then no other decisions can currently change this result. 769 // NOTE: if additional overrides are added in the future, this will additional logic. 770 return positiveDecision, nil 771 } 772 773 foundWithoutSubFilter = true 774 } 775 776 if !foundWithoutSubFilter { 777 // No need to look through the subFilter bindings. 778 return clusterDecision{}, nil 779 } 780 781 // Process all placement bindings with subFilter 782 for i, pb := range pbList.Items { 783 if pb.SubFilter != policiesv1.Restricted { 784 continue 785 } 786 787 found, err := r.isSingleClusterInDecisions(ctx, &pbList.Items[i], rootPlc.GetName(), clusterName) 788 if err != nil { 789 return clusterDecision{}, err 790 } 791 792 if !found { 793 continue 794 } 795 796 if strings.EqualFold(pb.BindingOverrides.RemediationAction, string(policiesv1.Enforce)) { 797 positiveDecision.PolicyOverrides = pb.BindingOverrides 798 // If an override is found, then no other decisions can currently change this result. 799 // NOTE: if additional overrides are added in the future, this will additional logic. 800 return positiveDecision, nil 801 } 802 } 803 804 // None of the bindings had any overrides. 805 return positiveDecision, nil 806 } 807 808 func (r *ReplicatedPolicyReconciler) isSingleClusterInDecisions( 809 ctx context.Context, pb *policiesv1.PlacementBinding, policyName, clusterName string, 810 ) (found bool, err error) { 811 if !common.HasValidPlacementRef(pb) { 812 return false, nil 813 } 814 815 subjectFound := false 816 817 for _, subject := range pb.Subjects { 818 if subject.APIGroup != policiesv1.SchemeGroupVersion.Group { 819 continue 820 } 821 822 switch subject.Kind { 823 case policiesv1.Kind: 824 if subject.Name == policyName { 825 subjectFound = true 826 } 827 case policiesv1.PolicySetKind: 828 if common.IsPolicyInPolicySet(ctx, r.Client, policyName, subject.Name, pb.GetNamespace()) { 829 subjectFound = true 830 } 831 } 832 833 if subjectFound { 834 break 835 } 836 } 837 838 if !subjectFound { 839 return false, nil 840 } 841 842 refNN := types.NamespacedName{ 843 Namespace: pb.GetNamespace(), 844 Name: pb.PlacementRef.Name, 845 } 846 847 switch pb.PlacementRef.Kind { 848 case "PlacementRule": 849 plr := &appsv1.PlacementRule{} 850 if err := r.Get(ctx, refNN, plr); err != nil && !k8serrors.IsNotFound(err) { 851 return false, fmt.Errorf("failed to get PlacementRule '%v': %w", pb.PlacementRef.Name, err) 852 } 853 854 for _, decision := range plr.Status.Decisions { 855 if decision.ClusterName == clusterName { 856 return true, nil 857 } 858 } 859 case "Placement": 860 pl := &clusterv1beta1.Placement{} 861 if err := r.Get(ctx, refNN, pl); err != nil && !k8serrors.IsNotFound(err) { 862 return false, fmt.Errorf("failed to get Placement '%v': %w", pb.PlacementRef.Name, err) 863 } 864 865 if k8serrors.IsNotFound(err) { 866 return false, nil 867 } 868 869 list := &clusterv1beta1.PlacementDecisionList{} 870 lopts := &client.ListOptions{Namespace: pb.GetNamespace()} 871 872 opts := client.MatchingLabels{"cluster.open-cluster-management.io/placement": pl.GetName()} 873 opts.ApplyToList(lopts) 874 875 err = r.List(ctx, list, lopts) 876 if err != nil && !k8serrors.IsNotFound(err) { 877 return false, fmt.Errorf("failed to list the PlacementDecisions for '%v', %w", pb.PlacementRef.Name, err) 878 } 879 880 for _, item := range list.Items { 881 for _, cluster := range item.Status.Decisions { 882 if cluster.ClusterName == clusterName { 883 return true, nil 884 } 885 } 886 } 887 } 888 889 return false, nil 890 }