github.com/operator-framework/operator-lifecycle-manager@v0.30.0/pkg/lib/operatorstatus/status.go (about)

     1  package operatorstatus
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"reflect"
     8  	"time"
     9  
    10  	configv1 "github.com/openshift/api/config/v1"
    11  	configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
    12  	"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned"
    13  	"github.com/sirupsen/logrus"
    14  	corev1 "k8s.io/api/core/v1"
    15  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    16  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    17  	"k8s.io/apimachinery/pkg/runtime/schema"
    18  	"k8s.io/apimachinery/pkg/util/diff"
    19  	"k8s.io/apimachinery/pkg/util/wait"
    20  	"k8s.io/client-go/discovery"
    21  
    22  	olmv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
    23  	"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
    24  	olmversion "github.com/operator-framework/operator-lifecycle-manager/pkg/version"
    25  )
    26  
    27  const (
    28  	clusterOperatorOLM            = "operator-lifecycle-manager"
    29  	clusterOperatorCatalogSource  = "operator-lifecycle-manager-catalog"
    30  	openshiftNamespace            = "openshift-operator-lifecycle-manager"
    31  	clusterServiceVersionResource = "clusterserviceversions"
    32  	subscriptionResource          = "subscriptions"
    33  	installPlanResource           = "installplans"
    34  )
    35  
    36  func MonitorClusterStatus(name string, syncCh <-chan error, stopCh <-chan struct{}, opClient operatorclient.ClientInterface,
    37  	configClient configv1client.ConfigV1Interface, crClient versioned.Interface, log *logrus.Logger) {
    38  	var (
    39  		syncs              int
    40  		successfulSyncs    int
    41  		hasClusterOperator bool
    42  	)
    43  	go wait.Until(func() {
    44  		// slow poll until we see a cluster operator API, which could be never
    45  		if !hasClusterOperator {
    46  			opStatusGV := schema.GroupVersion{
    47  				Group:   "config.openshift.io",
    48  				Version: "v1",
    49  			}
    50  			err := discovery.ServerSupportsVersion(opClient.KubernetesInterface().Discovery(), opStatusGV)
    51  			if err != nil {
    52  				log.Infof("ClusterOperator api not present, skipping update (%v)", err)
    53  				time.Sleep(time.Minute)
    54  				return
    55  			}
    56  			hasClusterOperator = true
    57  		}
    58  
    59  		// Sample the sync channel and see whether we're successfully retiring syncs as a
    60  		// proxy for "working" (we can't know when we hit level, but we can at least verify
    61  		// we are seeing some syncs succeeding). Once we observe at least one successful
    62  		// sync we can begin reporting available and level.
    63  		select {
    64  		case err, ok := <-syncCh:
    65  			if !ok {
    66  				// syncCh should only close if the Run() loop exits
    67  				time.Sleep(5 * time.Second)
    68  				log.Fatalf("Status sync channel closed but process did not exit in time")
    69  			}
    70  			syncs++
    71  			if err == nil {
    72  				successfulSyncs++
    73  			}
    74  			// grab any other sync events that have accumulated
    75  			for len(syncCh) > 0 {
    76  				if err := <-syncCh; err == nil {
    77  					successfulSyncs++
    78  				}
    79  				syncs++
    80  			}
    81  			// if we haven't yet accumulated enough syncs, wait longer
    82  			// TODO: replace these magic numbers with a better measure of syncs across all queueInformers
    83  			if successfulSyncs < 5 || syncs < 10 {
    84  				log.Printf("Waiting to observe more successful syncs")
    85  				return
    86  			}
    87  		}
    88  
    89  		// create the cluster operator in an initial state if it does not exist
    90  		existing, err := configClient.ClusterOperators().Get(context.TODO(), name, metav1.GetOptions{})
    91  		if apierrors.IsNotFound(err) {
    92  			log.Info("Existing operator status not found, creating")
    93  			created, createErr := configClient.ClusterOperators().Create(context.TODO(), &configv1.ClusterOperator{
    94  				ObjectMeta: metav1.ObjectMeta{
    95  					Name: name,
    96  				},
    97  				Status: configv1.ClusterOperatorStatus{
    98  					Conditions: []configv1.ClusterOperatorStatusCondition{
    99  						{
   100  							Type:               configv1.OperatorProgressing,
   101  							Status:             configv1.ConditionTrue,
   102  							Message:            fmt.Sprintf("Installing %s", olmversion.OLMVersion),
   103  							LastTransitionTime: metav1.Now(),
   104  						},
   105  						{
   106  							Type:               configv1.OperatorDegraded,
   107  							Status:             configv1.ConditionFalse,
   108  							LastTransitionTime: metav1.Now(),
   109  						},
   110  						{
   111  							Type:               configv1.OperatorAvailable,
   112  							Status:             configv1.ConditionFalse,
   113  							LastTransitionTime: metav1.Now(),
   114  						},
   115  						{
   116  							Type:               configv1.OperatorUpgradeable,
   117  							Status:             configv1.ConditionFalse,
   118  							LastTransitionTime: metav1.Now(),
   119  						},
   120  					},
   121  				},
   122  			}, metav1.CreateOptions{})
   123  			if createErr != nil {
   124  				log.Errorf("Failed to create cluster operator: %v\n", createErr)
   125  				return
   126  			}
   127  			created.Status.RelatedObjects, err = relatedObjects(name, opClient, crClient, log)
   128  			if err != nil {
   129  				log.Errorf("Failed to get related objects: %v", err)
   130  			}
   131  			existing = created
   132  			err = nil
   133  		}
   134  		if err != nil {
   135  			log.Errorf("Unable to retrieve cluster operator: %v", err)
   136  			return
   137  		}
   138  
   139  		// update the status with the appropriate state
   140  		previousStatus := existing.Status.DeepCopy()
   141  		previousOperatorVersion := func(vs []configv1.OperandVersion) string {
   142  			for _, v := range vs {
   143  				if v.Name == "operator" {
   144  					return v.Version
   145  				}
   146  			}
   147  			return ""
   148  		}(previousStatus.Versions)
   149  		targetOperatorVersion := os.Getenv("RELEASE_VERSION")
   150  		switch {
   151  		case successfulSyncs > 0:
   152  			setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   153  				Type:   configv1.OperatorDegraded,
   154  				Status: configv1.ConditionFalse,
   155  			})
   156  			setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   157  				Type:    configv1.OperatorProgressing,
   158  				Status:  configv1.ConditionFalse,
   159  				Message: fmt.Sprintf("Deployed %s", olmversion.OLMVersion),
   160  			})
   161  			setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   162  				Type:   configv1.OperatorAvailable,
   163  				Status: configv1.ConditionTrue,
   164  			})
   165  			setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   166  				Type:   configv1.OperatorUpgradeable,
   167  				Status: configv1.ConditionTrue,
   168  			})
   169  			// we set the versions array when all the latest code is deployed and running - in this case,
   170  			// the sync method is responsible for guaranteeing that happens before it returns nil
   171  			if len(targetOperatorVersion) > 0 {
   172  				existing.Status.Versions = []configv1.OperandVersion{
   173  					{
   174  						Name:    "operator",
   175  						Version: targetOperatorVersion,
   176  					},
   177  					{
   178  						Name:    "operator-lifecycle-manager",
   179  						Version: olmversion.OLMVersion,
   180  					},
   181  				}
   182  				if targetOperatorVersion != previousOperatorVersion {
   183  					setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   184  						Type:    configv1.OperatorProgressing,
   185  						Status:  configv1.ConditionTrue,
   186  						Message: fmt.Sprintf("Deployed %s", olmversion.OLMVersion),
   187  					})
   188  				}
   189  			} else {
   190  				existing.Status.Versions = nil
   191  			}
   192  		default:
   193  			setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   194  				Type:    configv1.OperatorDegraded,
   195  				Status:  configv1.ConditionTrue,
   196  				Message: "Waiting for updates to take effect",
   197  			})
   198  			setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   199  				Type:    configv1.OperatorProgressing,
   200  				Status:  configv1.ConditionFalse,
   201  				Message: fmt.Sprintf("Waiting to see update %s succeed", olmversion.OLMVersion),
   202  			})
   203  			setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
   204  				Type:    configv1.OperatorUpgradeable,
   205  				Status:  configv1.ConditionFalse,
   206  				Message: "Waiting for updates to take effect",
   207  			})
   208  			// TODO: use % errors within a window to report available
   209  		}
   210  
   211  		// always update the related objects in case changes have occurred
   212  		existing.Status.RelatedObjects, err = relatedObjects(name, opClient, crClient, log)
   213  		if err != nil {
   214  			log.Errorf("Failed to get related objects: %v", err)
   215  		}
   216  		if !reflect.DeepEqual(previousStatus.RelatedObjects, existing.Status.RelatedObjects) {
   217  			diffString := diff.ObjectDiff(previousStatus.RelatedObjects, existing.Status.RelatedObjects)
   218  			log.Debugf("Update required for related objects: %v", diffString)
   219  		}
   220  
   221  		// update the status
   222  		if !reflect.DeepEqual(previousStatus, &existing.Status) {
   223  			if _, err := configClient.ClusterOperators().UpdateStatus(context.TODO(), existing, metav1.UpdateOptions{}); err != nil {
   224  				log.Errorf("Unable to update cluster operator status: %v", err)
   225  			}
   226  		}
   227  
   228  		// if we've reported success, we can sleep longer, otherwise we want to keep watching for
   229  		// successful
   230  		if successfulSyncs > 0 {
   231  			time.Sleep(25 * time.Second)
   232  		}
   233  
   234  	}, 5*time.Second, stopCh)
   235  }
   236  
   237  func setOperatorStatusCondition(conditions *[]configv1.ClusterOperatorStatusCondition, newCondition configv1.ClusterOperatorStatusCondition) {
   238  	if conditions == nil {
   239  		conditions = &[]configv1.ClusterOperatorStatusCondition{}
   240  	}
   241  	existingCondition := findOperatorStatusCondition(*conditions, newCondition.Type)
   242  	if existingCondition == nil {
   243  		newCondition.LastTransitionTime = metav1.NewTime(time.Now())
   244  		*conditions = append(*conditions, newCondition)
   245  		return
   246  	}
   247  
   248  	if existingCondition.Status != newCondition.Status {
   249  		existingCondition.Status = newCondition.Status
   250  		existingCondition.LastTransitionTime = metav1.NewTime(time.Now())
   251  	}
   252  
   253  	existingCondition.Reason = newCondition.Reason
   254  	existingCondition.Message = newCondition.Message
   255  }
   256  
   257  func findOperatorStatusCondition(conditions []configv1.ClusterOperatorStatusCondition, conditionType configv1.ClusterStatusConditionType) *configv1.ClusterOperatorStatusCondition {
   258  	for i := range conditions {
   259  		if conditions[i].Type == conditionType {
   260  			return &conditions[i]
   261  		}
   262  	}
   263  
   264  	return nil
   265  }
   266  
   267  // relatedObjects returns RelatedObjects in the ClusterOperator.Status.
   268  // RelatedObjects are consumed by https://github.com/openshift/must-gather
   269  func relatedObjects(name string, opClient operatorclient.ClientInterface, crClient versioned.Interface, log *logrus.Logger) ([]configv1.ObjectReference, error) {
   270  	var objectReferences []configv1.ObjectReference
   271  	log.Debugf("Adding related objects for %v", name)
   272  	namespace := openshiftNamespace // hard-coded to constant
   273  
   274  	switch name {
   275  	case clusterOperatorOLM:
   276  		csvList, err := crClient.OperatorsV1alpha1().ClusterServiceVersions(namespace).List(context.TODO(), metav1.ListOptions{})
   277  		if err != nil {
   278  			return nil, err
   279  		}
   280  
   281  		for _, csv := range csvList.Items {
   282  			if csv.IsCopied() {
   283  				continue
   284  			}
   285  			objectReferences = append(objectReferences, configv1.ObjectReference{
   286  				Group:     olmv1alpha1.GroupName,
   287  				Resource:  clusterServiceVersionResource,
   288  				Namespace: csv.GetNamespace(),
   289  				Name:      csv.GetName(),
   290  			})
   291  		}
   292  	case clusterOperatorCatalogSource:
   293  		subList, err := crClient.OperatorsV1alpha1().Subscriptions(namespace).List(context.TODO(), metav1.ListOptions{})
   294  		if err != nil {
   295  			return nil, err
   296  		}
   297  
   298  		installPlanList, err := crClient.OperatorsV1alpha1().InstallPlans(namespace).List(context.TODO(), metav1.ListOptions{})
   299  		if err != nil {
   300  			return nil, err
   301  		}
   302  
   303  		for _, sub := range subList.Items {
   304  			objectReferences = append(objectReferences, configv1.ObjectReference{
   305  				Group:     olmv1alpha1.GroupName,
   306  				Resource:  subscriptionResource,
   307  				Namespace: sub.GetNamespace(),
   308  				Name:      sub.GetName(),
   309  			})
   310  		}
   311  		for _, ip := range installPlanList.Items {
   312  			objectReferences = append(objectReferences, configv1.ObjectReference{
   313  				Group:     olmv1alpha1.GroupName,
   314  				Resource:  installPlanResource,
   315  				Namespace: ip.GetNamespace(),
   316  				Name:      ip.GetName(),
   317  			})
   318  		}
   319  	}
   320  	namespaces := configv1.ObjectReference{
   321  		Group:    corev1.GroupName,
   322  		Resource: "namespaces",
   323  		Name:     namespace,
   324  	}
   325  	objectReferences = append(objectReferences, namespaces)
   326  	return objectReferences, nil
   327  }