open-cluster-management.io/governance-policy-propagator@v0.13.0/controllers/propagator/propagation.go (about) 1 // Copyright (c) 2021 Red Hat, Inc. 2 // Copyright Contributors to the Open Cluster Management project 3 4 package propagator 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "strconv" 11 "strings" 12 "sync" 13 "time" 14 15 templates "github.com/stolostron/go-template-utils/v4/pkg/templates" 16 k8sdepwatches "github.com/stolostron/kubernetes-dependency-watches/client" 17 k8serrors "k8s.io/apimachinery/pkg/api/errors" 18 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 20 "k8s.io/apimachinery/pkg/runtime" 21 "k8s.io/apimachinery/pkg/runtime/schema" 22 "k8s.io/apimachinery/pkg/util/json" 23 "k8s.io/client-go/tools/record" 24 "sigs.k8s.io/controller-runtime/pkg/client" 25 "sigs.k8s.io/controller-runtime/pkg/event" 26 27 policiesv1 "open-cluster-management.io/governance-policy-propagator/api/v1" 28 "open-cluster-management.io/governance-policy-propagator/controllers/common" 29 ) 30 31 const ( 32 TemplateStartDelim = "{{hub" 33 TemplateStopDelim = "hub}}" 34 TriggerUpdateAnnotation = "policy.open-cluster-management.io/trigger-update" 35 ) 36 37 var ErrRetryable = errors.New("") 38 39 type Propagator struct { 40 client.Client 41 Scheme *runtime.Scheme 42 Recorder record.EventRecorder 43 RootPolicyLocks *sync.Map 44 ReplicatedPolicyUpdates chan event.GenericEvent 45 } 46 47 // clusterDecision contains a single decision where the replicated policy 48 // should be processed and any overrides to the root policy 49 type clusterDecision struct { 50 Cluster string 51 PolicyOverrides policiesv1.BindingOverrides 52 } 53 54 // cleanUpOrphanedRplPolicies compares the status of the input policy against the input placement 55 // decisions. If the cluster exists in the status but doesn't exist in the input placement 56 // decisions, then it's considered stale and an event is sent to the replicated policy reconciler 57 // so the policy will be removed. 58 func (r *RootPolicyReconciler) cleanUpOrphanedRplPolicies( 59 instance *policiesv1.Policy, originalCPCS []*policiesv1.CompliancePerClusterStatus, allDecisions common.DecisionSet, 60 ) error { 61 log := log.WithValues("policyName", instance.GetName(), "policyNamespace", instance.GetNamespace()) 62 63 for _, cluster := range originalCPCS { 64 if allDecisions[cluster.ClusterName] { 65 continue 66 } 67 68 // not found in allDecisions, orphan, send an event for it to delete itself 69 simpleObj := &common.GuttedObject{ 70 TypeMeta: metav1.TypeMeta{ 71 Kind: policiesv1.Kind, 72 APIVersion: policiesv1.GroupVersion.String(), 73 }, 74 ObjectMeta: metav1.ObjectMeta{ 75 Name: common.FullNameForPolicy(instance), 76 Namespace: cluster.ClusterName, 77 }, 78 } 79 80 log.V(2).Info("Sending reconcile for replicated policy", "replicatedPolicyName", simpleObj.GetName()) 81 82 r.ReplicatedPolicyUpdates <- event.GenericEvent{Object: simpleObj} 83 } 84 85 return nil 86 } 87 88 // handleRootPolicy will properly replicate or clean up when a root policy is updated. 89 func (r *RootPolicyReconciler) handleRootPolicy(ctx context.Context, instance *policiesv1.Policy) error { 90 // Generate a metric for elapsed handling time for each policy 91 entryTS := time.Now() 92 defer func() { 93 now := time.Now() 94 elapsed := now.Sub(entryTS).Seconds() 95 roothandlerMeasure.Observe(elapsed) 96 }() 97 98 log := log.WithValues("policyName", instance.GetName(), "policyNamespace", instance.GetNamespace()) 99 100 // Clean up the replicated policies if the policy is disabled 101 if instance.Spec.Disabled { 102 log.Info("The policy is disabled, doing clean up") 103 104 updateCount, err := r.updateExistingReplicas(ctx, instance.Namespace+"."+instance.Name) 105 if err != nil { 106 return err 107 } 108 109 // Checks if replicated policies exist in the event that 110 // a double reconcile to prevent emitting the same event twice 111 if updateCount > 0 { 112 r.Recorder.Event(instance, "Normal", "PolicyPropagation", 113 fmt.Sprintf("Policy %s/%s was disabled", instance.GetNamespace(), instance.GetName())) 114 } 115 } 116 117 // make a copy of the original status 118 originalCPCS := make([]*policiesv1.CompliancePerClusterStatus, len(instance.Status.Status)) 119 copy(originalCPCS, instance.Status.Status) 120 121 decisions, err := common.RootStatusUpdate(ctx, r.Client, instance) 122 if err != nil { 123 return err 124 } 125 126 log.Info("Sending reconcile events to replicated policies", "decisionsCount", len(decisions)) 127 128 for decision := range decisions { 129 simpleObj := &common.GuttedObject{ 130 TypeMeta: metav1.TypeMeta{ 131 Kind: policiesv1.Kind, 132 APIVersion: policiesv1.GroupVersion.String(), 133 }, 134 ObjectMeta: metav1.ObjectMeta{ 135 Name: common.FullNameForPolicy(instance), 136 Namespace: decision, 137 }, 138 } 139 140 log.V(2).Info("Sending reconcile for replicated policy", "replicatedPolicyName", simpleObj.GetName()) 141 142 r.ReplicatedPolicyUpdates <- event.GenericEvent{Object: simpleObj} 143 } 144 145 err = r.cleanUpOrphanedRplPolicies(instance, originalCPCS, decisions) 146 if err != nil { 147 log.Error(err, "Failed to delete orphaned replicated policies") 148 149 return err 150 } 151 152 return nil 153 } 154 155 // a helper to quickly check if there are any templates in any of the policy templates 156 func policyHasTemplates(instance *policiesv1.Policy) bool { 157 for _, policyT := range instance.Spec.PolicyTemplates { 158 if templates.HasTemplate(policyT.ObjectDefinition.Raw, TemplateStartDelim, false) { 159 return true 160 } 161 } 162 163 return false 164 } 165 166 type templateCtx struct { 167 ManagedClusterName string 168 ManagedClusterLabels map[string]string 169 } 170 171 func addManagedClusterLabels(clusterName string) func(templates.CachingQueryAPI, interface{}) (interface{}, error) { 172 return func(api templates.CachingQueryAPI, ctx interface{}) (interface{}, error) { 173 typedCtx, ok := ctx.(templateCtx) 174 if !ok { 175 return ctx, nil 176 } 177 178 managedClusterGVK := schema.GroupVersionKind{ 179 Group: "cluster.open-cluster-management.io", 180 Version: "v1", 181 Kind: "ManagedCluster", 182 } 183 184 managedCluster, err := api.Get(managedClusterGVK, "", clusterName) 185 if err != nil { 186 return ctx, err 187 } 188 189 typedCtx.ManagedClusterLabels = managedCluster.GetLabels() 190 191 return typedCtx, nil 192 } 193 } 194 195 // Iterates through policy definitions and processes hub templates. A special annotation 196 // policy.open-cluster-management.io/trigger-update is used to trigger reprocessing of the templates 197 // and ensure that replicated-policies in the cluster are updated only if there is a change. This 198 // annotation is deleted from the replicated policies and not propagated to the cluster namespaces. 199 func (r *ReplicatedPolicyReconciler) processTemplates( 200 ctx context.Context, 201 replicatedPlc *policiesv1.Policy, clusterName string, rootPlc *policiesv1.Policy, 202 ) error { 203 log := log.WithValues( 204 "policyName", rootPlc.GetName(), 205 "policyNamespace", rootPlc.GetNamespace(), 206 "cluster", clusterName, 207 ) 208 log.V(1).Info("Processing templates") 209 210 annotations := replicatedPlc.GetAnnotations() 211 212 // handle possible nil map 213 if len(annotations) == 0 { 214 annotations = make(map[string]string) 215 } 216 217 // if disable-templates annotations exists and is true, then exit without processing templates 218 if disable, ok := annotations["policy.open-cluster-management.io/disable-templates"]; ok { 219 if boolDisable, err := strconv.ParseBool(disable); err == nil && boolDisable { 220 log.Info("Detected the disable-templates annotation. Will not process templates.") 221 222 return nil 223 } 224 } 225 226 // clear the trigger-update annotation, it's only for the root policy shouldn't be in replicated 227 // policies as it will cause an unnecessary update to the managed clusters 228 if _, ok := annotations[TriggerUpdateAnnotation]; ok { 229 delete(annotations, TriggerUpdateAnnotation) 230 replicatedPlc.SetAnnotations(annotations) 231 } 232 233 plcGVK := replicatedPlc.GroupVersionKind() 234 235 templateResolverOptions := templates.ResolveOptions{ 236 ClusterScopedAllowList: []templates.ClusterScopedObjectIdentifier{ 237 { 238 Group: "cluster.open-cluster-management.io", 239 Kind: "ManagedCluster", 240 Name: clusterName, 241 }, 242 }, 243 DisableAutoCacheCleanUp: true, 244 LookupNamespace: rootPlc.GetNamespace(), 245 Watcher: &k8sdepwatches.ObjectIdentifier{ 246 Group: plcGVK.Group, 247 Version: plcGVK.Version, 248 Kind: plcGVK.Kind, 249 Namespace: replicatedPlc.GetNamespace(), 250 Name: replicatedPlc.GetName(), 251 }, 252 } 253 254 var templateResult templates.TemplateResult 255 var cacheCleanUp templates.CacheCleanUpFunc 256 257 defer func() { 258 if cacheCleanUp != nil { 259 err := cacheCleanUp() 260 if err != nil { 261 log.Error(err, "Failed to perform the cache clean up after template resolution") 262 } 263 } 264 }() 265 266 // A policy can have multiple policy templates within it, iterate and process each 267 for _, policyT := range replicatedPlc.Spec.PolicyTemplates { 268 if !templates.HasTemplate(policyT.ObjectDefinition.Raw, TemplateStartDelim, false) { 269 continue 270 } 271 272 if !isConfigurationPolicy(policyT) { 273 // has Templates but not a configuration policy 274 err := k8serrors.NewBadRequest("Templates are restricted to only Configuration Policies") 275 log.Error(err, "Not a Configuration Policy") 276 277 r.Recorder.Event(rootPlc, "Warning", "PolicyPropagation", 278 fmt.Sprintf( 279 "Policy %s/%s has templates but it is not a ConfigurationPolicy.", 280 rootPlc.GetName(), 281 rootPlc.GetNamespace(), 282 ), 283 ) 284 285 return err 286 } 287 288 log.V(1).Info("Found an object definition with templates") 289 290 templateContext := templateCtx{ManagedClusterName: clusterName} 291 292 if strings.Contains(string(policyT.ObjectDefinition.Raw), "ManagedClusterLabels") { 293 templateResolverOptions.ContextTransformers = append( 294 templateResolverOptions.ContextTransformers, addManagedClusterLabels(clusterName), 295 ) 296 } 297 298 // Handle value encryption initialization 299 usesEncryption := templates.UsesEncryption(policyT.ObjectDefinition.Raw, TemplateStartDelim, TemplateStopDelim) 300 // Initialize AES Key and initialization vector 301 if usesEncryption && !templateResolverOptions.EncryptionEnabled { 302 log.V(1).Info("Found an object definition requiring encryption. Handling encryption keys.") 303 // Get/generate the encryption key 304 encryptionKey, err := r.getEncryptionKey(ctx, clusterName) 305 if err != nil { 306 log.Error(err, "Failed to get/generate the policy encryption key") 307 308 return fmt.Errorf("%w%w", ErrRetryable, err) 309 } 310 311 // Get/generate the initialization vector 312 initializationVector, err := r.getInitializationVector( 313 rootPlc.GetName(), clusterName, annotations, 314 ) 315 if err != nil { 316 log.Error(err, "Failed to get initialization vector") 317 318 return err 319 } 320 321 // Set the initialization vector in the annotations 322 replicatedPlc.SetAnnotations(annotations) 323 324 // Set the EncryptionConfig with the retrieved key 325 templateResolverOptions.EncryptionConfig = templates.EncryptionConfig{ 326 EncryptionEnabled: true, 327 AESKey: encryptionKey, 328 InitializationVector: initializationVector, 329 } 330 } 331 332 var tplErr error 333 334 templateResult, tplErr = r.TemplateResolver.ResolveTemplate( 335 policyT.ObjectDefinition.Raw, templateContext, &templateResolverOptions, 336 ) 337 338 if templateResult.CacheCleanUp != nil { 339 cacheCleanUp = templateResult.CacheCleanUp 340 } 341 342 if tplErr != nil { 343 log.Error(tplErr, "Failed to resolve templates") 344 345 r.Recorder.Event( 346 rootPlc, 347 "Warning", 348 "PolicyPropagation", 349 fmt.Sprintf( 350 "Failed to resolve templates for cluster %s: %s", 351 clusterName, 352 tplErr.Error(), 353 ), 354 ) 355 // Set an annotation on the policyTemplate(e.g. ConfigurationPolicy) to the template processing error msg 356 // managed clusters will use this when creating a violation 357 policyTObjectUnstructured := &unstructured.Unstructured{} 358 359 jsonErr := json.Unmarshal(policyT.ObjectDefinition.Raw, policyTObjectUnstructured) 360 if jsonErr != nil { 361 // it shouldn't get here but if it did just log a msg 362 // it's all right, a generic msg will be used on the managedcluster 363 log.Error(jsonErr, "Error unmarshalling the object definition to JSON") 364 } else { 365 policyTAnnotations := policyTObjectUnstructured.GetAnnotations() 366 if policyTAnnotations == nil { 367 policyTAnnotations = make(map[string]string) 368 } 369 policyTAnnotations["policy.open-cluster-management.io/hub-templates-error"] = tplErr.Error() 370 policyTObjectUnstructured.SetAnnotations(policyTAnnotations) 371 372 updatedPolicyT, jsonErr := json.Marshal(policyTObjectUnstructured) 373 if jsonErr != nil { 374 log.Error(jsonErr, "Failed to marshall the policy template to JSON") 375 } else { 376 policyT.ObjectDefinition.Raw = updatedPolicyT 377 } 378 } 379 380 // If the failure was due to a Kubernetes API error that could be recoverable, let's retry it. 381 // Missing objects are handled by the templating library sending reconcile requests when they get created. 382 if errors.Is(tplErr, templates.ErrMissingAPIResource) || 383 k8serrors.IsInternalError(tplErr) || 384 k8serrors.IsServiceUnavailable(tplErr) || 385 k8serrors.IsTimeout(tplErr) || 386 k8serrors.IsTooManyRequests(tplErr) { 387 tplErr = fmt.Errorf("%w%w", ErrRetryable, tplErr) 388 } 389 390 return tplErr 391 } 392 393 policyT.ObjectDefinition.Raw = templateResult.ResolvedJSON 394 395 // Set initialization vector annotation on the ObjectDefinition for the controller's use 396 if usesEncryption { 397 policyTObjectUnstructured := &unstructured.Unstructured{} 398 399 jsonErr := json.Unmarshal(templateResult.ResolvedJSON, policyTObjectUnstructured) 400 if jsonErr != nil { 401 return fmt.Errorf("failed to unmarshal the object definition to JSON: %w", jsonErr) 402 } 403 404 policyTAnnotations := policyTObjectUnstructured.GetAnnotations() 405 if policyTAnnotations == nil { 406 policyTAnnotations = make(map[string]string) 407 } 408 409 policyIV := annotations[IVAnnotation] 410 foundIV := policyTAnnotations[IVAnnotation] 411 412 if policyIV != foundIV { 413 policyTAnnotations[IVAnnotation] = policyIV 414 policyTObjectUnstructured.SetAnnotations(policyTAnnotations) 415 416 updatedPolicyT, jsonErr := json.Marshal(policyTObjectUnstructured) 417 if jsonErr != nil { 418 return fmt.Errorf("failed to marshal the policy template to JSON: %w", jsonErr) 419 } 420 421 policyT.ObjectDefinition.Raw = updatedPolicyT 422 } 423 } 424 } 425 426 log.V(1).Info("Successfully processed templates") 427 428 return nil 429 } 430 431 func isConfigurationPolicy(policyT *policiesv1.PolicyTemplate) bool { 432 // check if it is a configuration policy first 433 var jsonDef map[string]interface{} 434 _ = json.Unmarshal(policyT.ObjectDefinition.Raw, &jsonDef) 435 436 return jsonDef != nil && jsonDef["kind"] == "ConfigurationPolicy" 437 }