github.com/operator-framework/operator-lifecycle-manager@v0.30.0/pkg/lib/operatorstatus/status.go (about) 1 package operatorstatus 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "reflect" 8 "time" 9 10 configv1 "github.com/openshift/api/config/v1" 11 configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" 12 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned" 13 "github.com/sirupsen/logrus" 14 corev1 "k8s.io/api/core/v1" 15 apierrors "k8s.io/apimachinery/pkg/api/errors" 16 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 "k8s.io/apimachinery/pkg/runtime/schema" 18 "k8s.io/apimachinery/pkg/util/diff" 19 "k8s.io/apimachinery/pkg/util/wait" 20 "k8s.io/client-go/discovery" 21 22 olmv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" 23 "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient" 24 olmversion "github.com/operator-framework/operator-lifecycle-manager/pkg/version" 25 ) 26 27 const ( 28 clusterOperatorOLM = "operator-lifecycle-manager" 29 clusterOperatorCatalogSource = "operator-lifecycle-manager-catalog" 30 openshiftNamespace = "openshift-operator-lifecycle-manager" 31 clusterServiceVersionResource = "clusterserviceversions" 32 subscriptionResource = "subscriptions" 33 installPlanResource = "installplans" 34 ) 35 36 func MonitorClusterStatus(name string, syncCh <-chan error, stopCh <-chan struct{}, opClient operatorclient.ClientInterface, 37 configClient configv1client.ConfigV1Interface, crClient versioned.Interface, log *logrus.Logger) { 38 var ( 39 syncs int 40 successfulSyncs int 41 hasClusterOperator bool 42 ) 43 go wait.Until(func() { 44 // slow poll until we see a cluster operator API, which could be never 45 if !hasClusterOperator { 46 opStatusGV := schema.GroupVersion{ 47 Group: "config.openshift.io", 48 Version: "v1", 49 } 50 err := discovery.ServerSupportsVersion(opClient.KubernetesInterface().Discovery(), opStatusGV) 51 if err != nil { 52 log.Infof("ClusterOperator api not present, skipping update (%v)", err) 53 time.Sleep(time.Minute) 54 return 55 } 56 hasClusterOperator = true 57 } 58 59 // Sample the sync channel and see whether we're successfully retiring syncs as a 60 // proxy for "working" (we can't know when we hit level, but we can at least verify 61 // we are seeing some syncs succeeding). Once we observe at least one successful 62 // sync we can begin reporting available and level. 63 select { 64 case err, ok := <-syncCh: 65 if !ok { 66 // syncCh should only close if the Run() loop exits 67 time.Sleep(5 * time.Second) 68 log.Fatalf("Status sync channel closed but process did not exit in time") 69 } 70 syncs++ 71 if err == nil { 72 successfulSyncs++ 73 } 74 // grab any other sync events that have accumulated 75 for len(syncCh) > 0 { 76 if err := <-syncCh; err == nil { 77 successfulSyncs++ 78 } 79 syncs++ 80 } 81 // if we haven't yet accumulated enough syncs, wait longer 82 // TODO: replace these magic numbers with a better measure of syncs across all queueInformers 83 if successfulSyncs < 5 || syncs < 10 { 84 log.Printf("Waiting to observe more successful syncs") 85 return 86 } 87 } 88 89 // create the cluster operator in an initial state if it does not exist 90 existing, err := configClient.ClusterOperators().Get(context.TODO(), name, metav1.GetOptions{}) 91 if apierrors.IsNotFound(err) { 92 log.Info("Existing operator status not found, creating") 93 created, createErr := configClient.ClusterOperators().Create(context.TODO(), &configv1.ClusterOperator{ 94 ObjectMeta: metav1.ObjectMeta{ 95 Name: name, 96 }, 97 Status: configv1.ClusterOperatorStatus{ 98 Conditions: []configv1.ClusterOperatorStatusCondition{ 99 { 100 Type: configv1.OperatorProgressing, 101 Status: configv1.ConditionTrue, 102 Message: fmt.Sprintf("Installing %s", olmversion.OLMVersion), 103 LastTransitionTime: metav1.Now(), 104 }, 105 { 106 Type: configv1.OperatorDegraded, 107 Status: configv1.ConditionFalse, 108 LastTransitionTime: metav1.Now(), 109 }, 110 { 111 Type: configv1.OperatorAvailable, 112 Status: configv1.ConditionFalse, 113 LastTransitionTime: metav1.Now(), 114 }, 115 { 116 Type: configv1.OperatorUpgradeable, 117 Status: configv1.ConditionFalse, 118 LastTransitionTime: metav1.Now(), 119 }, 120 }, 121 }, 122 }, metav1.CreateOptions{}) 123 if createErr != nil { 124 log.Errorf("Failed to create cluster operator: %v\n", createErr) 125 return 126 } 127 created.Status.RelatedObjects, err = relatedObjects(name, opClient, crClient, log) 128 if err != nil { 129 log.Errorf("Failed to get related objects: %v", err) 130 } 131 existing = created 132 err = nil 133 } 134 if err != nil { 135 log.Errorf("Unable to retrieve cluster operator: %v", err) 136 return 137 } 138 139 // update the status with the appropriate state 140 previousStatus := existing.Status.DeepCopy() 141 previousOperatorVersion := func(vs []configv1.OperandVersion) string { 142 for _, v := range vs { 143 if v.Name == "operator" { 144 return v.Version 145 } 146 } 147 return "" 148 }(previousStatus.Versions) 149 targetOperatorVersion := os.Getenv("RELEASE_VERSION") 150 switch { 151 case successfulSyncs > 0: 152 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 153 Type: configv1.OperatorDegraded, 154 Status: configv1.ConditionFalse, 155 }) 156 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 157 Type: configv1.OperatorProgressing, 158 Status: configv1.ConditionFalse, 159 Message: fmt.Sprintf("Deployed %s", olmversion.OLMVersion), 160 }) 161 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 162 Type: configv1.OperatorAvailable, 163 Status: configv1.ConditionTrue, 164 }) 165 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 166 Type: configv1.OperatorUpgradeable, 167 Status: configv1.ConditionTrue, 168 }) 169 // we set the versions array when all the latest code is deployed and running - in this case, 170 // the sync method is responsible for guaranteeing that happens before it returns nil 171 if len(targetOperatorVersion) > 0 { 172 existing.Status.Versions = []configv1.OperandVersion{ 173 { 174 Name: "operator", 175 Version: targetOperatorVersion, 176 }, 177 { 178 Name: "operator-lifecycle-manager", 179 Version: olmversion.OLMVersion, 180 }, 181 } 182 if targetOperatorVersion != previousOperatorVersion { 183 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 184 Type: configv1.OperatorProgressing, 185 Status: configv1.ConditionTrue, 186 Message: fmt.Sprintf("Deployed %s", olmversion.OLMVersion), 187 }) 188 } 189 } else { 190 existing.Status.Versions = nil 191 } 192 default: 193 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 194 Type: configv1.OperatorDegraded, 195 Status: configv1.ConditionTrue, 196 Message: "Waiting for updates to take effect", 197 }) 198 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 199 Type: configv1.OperatorProgressing, 200 Status: configv1.ConditionFalse, 201 Message: fmt.Sprintf("Waiting to see update %s succeed", olmversion.OLMVersion), 202 }) 203 setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{ 204 Type: configv1.OperatorUpgradeable, 205 Status: configv1.ConditionFalse, 206 Message: "Waiting for updates to take effect", 207 }) 208 // TODO: use % errors within a window to report available 209 } 210 211 // always update the related objects in case changes have occurred 212 existing.Status.RelatedObjects, err = relatedObjects(name, opClient, crClient, log) 213 if err != nil { 214 log.Errorf("Failed to get related objects: %v", err) 215 } 216 if !reflect.DeepEqual(previousStatus.RelatedObjects, existing.Status.RelatedObjects) { 217 diffString := diff.ObjectDiff(previousStatus.RelatedObjects, existing.Status.RelatedObjects) 218 log.Debugf("Update required for related objects: %v", diffString) 219 } 220 221 // update the status 222 if !reflect.DeepEqual(previousStatus, &existing.Status) { 223 if _, err := configClient.ClusterOperators().UpdateStatus(context.TODO(), existing, metav1.UpdateOptions{}); err != nil { 224 log.Errorf("Unable to update cluster operator status: %v", err) 225 } 226 } 227 228 // if we've reported success, we can sleep longer, otherwise we want to keep watching for 229 // successful 230 if successfulSyncs > 0 { 231 time.Sleep(25 * time.Second) 232 } 233 234 }, 5*time.Second, stopCh) 235 } 236 237 func setOperatorStatusCondition(conditions *[]configv1.ClusterOperatorStatusCondition, newCondition configv1.ClusterOperatorStatusCondition) { 238 if conditions == nil { 239 conditions = &[]configv1.ClusterOperatorStatusCondition{} 240 } 241 existingCondition := findOperatorStatusCondition(*conditions, newCondition.Type) 242 if existingCondition == nil { 243 newCondition.LastTransitionTime = metav1.NewTime(time.Now()) 244 *conditions = append(*conditions, newCondition) 245 return 246 } 247 248 if existingCondition.Status != newCondition.Status { 249 existingCondition.Status = newCondition.Status 250 existingCondition.LastTransitionTime = metav1.NewTime(time.Now()) 251 } 252 253 existingCondition.Reason = newCondition.Reason 254 existingCondition.Message = newCondition.Message 255 } 256 257 func findOperatorStatusCondition(conditions []configv1.ClusterOperatorStatusCondition, conditionType configv1.ClusterStatusConditionType) *configv1.ClusterOperatorStatusCondition { 258 for i := range conditions { 259 if conditions[i].Type == conditionType { 260 return &conditions[i] 261 } 262 } 263 264 return nil 265 } 266 267 // relatedObjects returns RelatedObjects in the ClusterOperator.Status. 268 // RelatedObjects are consumed by https://github.com/openshift/must-gather 269 func relatedObjects(name string, opClient operatorclient.ClientInterface, crClient versioned.Interface, log *logrus.Logger) ([]configv1.ObjectReference, error) { 270 var objectReferences []configv1.ObjectReference 271 log.Debugf("Adding related objects for %v", name) 272 namespace := openshiftNamespace // hard-coded to constant 273 274 switch name { 275 case clusterOperatorOLM: 276 csvList, err := crClient.OperatorsV1alpha1().ClusterServiceVersions(namespace).List(context.TODO(), metav1.ListOptions{}) 277 if err != nil { 278 return nil, err 279 } 280 281 for _, csv := range csvList.Items { 282 if csv.IsCopied() { 283 continue 284 } 285 objectReferences = append(objectReferences, configv1.ObjectReference{ 286 Group: olmv1alpha1.GroupName, 287 Resource: clusterServiceVersionResource, 288 Namespace: csv.GetNamespace(), 289 Name: csv.GetName(), 290 }) 291 } 292 case clusterOperatorCatalogSource: 293 subList, err := crClient.OperatorsV1alpha1().Subscriptions(namespace).List(context.TODO(), metav1.ListOptions{}) 294 if err != nil { 295 return nil, err 296 } 297 298 installPlanList, err := crClient.OperatorsV1alpha1().InstallPlans(namespace).List(context.TODO(), metav1.ListOptions{}) 299 if err != nil { 300 return nil, err 301 } 302 303 for _, sub := range subList.Items { 304 objectReferences = append(objectReferences, configv1.ObjectReference{ 305 Group: olmv1alpha1.GroupName, 306 Resource: subscriptionResource, 307 Namespace: sub.GetNamespace(), 308 Name: sub.GetName(), 309 }) 310 } 311 for _, ip := range installPlanList.Items { 312 objectReferences = append(objectReferences, configv1.ObjectReference{ 313 Group: olmv1alpha1.GroupName, 314 Resource: installPlanResource, 315 Namespace: ip.GetNamespace(), 316 Name: ip.GetName(), 317 }) 318 } 319 } 320 namespaces := configv1.ObjectReference{ 321 Group: corev1.GroupName, 322 Resource: "namespaces", 323 Name: namespace, 324 } 325 objectReferences = append(objectReferences, namespaces) 326 return objectReferences, nil 327 }