github.com/percona/percona-xtradb-cluster-operator@v1.14.0/pkg/controller/pxc/version.go (about)

     1  package pxc
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math/rand"
     7  	"os"
     8  	"strings"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	"github.com/pkg/errors"
    13  	"github.com/robfig/cron/v3"
    14  	corev1 "k8s.io/api/core/v1"
    15  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    16  	"k8s.io/apimachinery/pkg/labels"
    17  	"k8s.io/apimachinery/pkg/types"
    18  	k8sretry "k8s.io/client-go/util/retry"
    19  	"sigs.k8s.io/controller-runtime/pkg/client"
    20  	logf "sigs.k8s.io/controller-runtime/pkg/log"
    21  
    22  	apiv1 "github.com/percona/percona-xtradb-cluster-operator/pkg/apis/pxc/v1"
    23  	"github.com/percona/percona-xtradb-cluster-operator/pkg/k8s"
    24  	"github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/queries"
    25  	"github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/users"
    26  	"github.com/percona/percona-xtradb-cluster-operator/version"
    27  )
    28  
    29  type Schedule struct {
    30  	ID           cron.EntryID
    31  	CronSchedule string
    32  }
    33  
    34  var versionNotReadyErr = errors.New("not ready to fetch version")
    35  
    36  func versionJobName(cr *apiv1.PerconaXtraDBCluster) string {
    37  	jobName := "ensure-version"
    38  	nn := types.NamespacedName{
    39  		Name:      cr.Name,
    40  		Namespace: cr.Namespace,
    41  	}
    42  	return fmt.Sprintf("%s/%s", jobName, nn.String())
    43  }
    44  
    45  func telemetryJobName(cr *apiv1.PerconaXtraDBCluster) string {
    46  	jobName := "telemetry"
    47  	nn := types.NamespacedName{
    48  		Name:      cr.Name,
    49  		Namespace: cr.Namespace,
    50  	}
    51  	return fmt.Sprintf("%s/%s", jobName, nn.String())
    52  }
    53  
    54  func (r *ReconcilePerconaXtraDBCluster) deleteCronJob(jobName string) {
    55  	job, ok := r.crons.ensureVersionJobs.LoadAndDelete(jobName)
    56  	if !ok {
    57  		return
    58  	}
    59  	r.crons.crons.Remove(job.(Schedule).ID)
    60  }
    61  
    62  func (r *ReconcilePerconaXtraDBCluster) scheduleTelemetryRequests(ctx context.Context, cr *apiv1.PerconaXtraDBCluster, vs VersionService) error {
    63  	log := logf.FromContext(ctx)
    64  
    65  	jn := telemetryJobName(cr)
    66  	scheduleRaw, ok := r.crons.ensureVersionJobs.Load(jn)
    67  	if !telemetryEnabled() {
    68  		if ok {
    69  			r.deleteCronJob(jn)
    70  		}
    71  		return nil
    72  	}
    73  
    74  	schedule := Schedule{}
    75  	if ok {
    76  		schedule = scheduleRaw.(Schedule)
    77  	}
    78  
    79  	sch, found := os.LookupEnv("TELEMETRY_SCHEDULE")
    80  	if !found {
    81  		sch = fmt.Sprintf("%d * * * *", rand.Intn(60))
    82  	}
    83  
    84  	if ok && !found {
    85  		return nil
    86  	}
    87  
    88  	if found && schedule.CronSchedule == sch {
    89  		return nil
    90  	}
    91  
    92  	if ok {
    93  		log.Info("remove job because of new", "old", schedule.CronSchedule, "new", sch)
    94  		r.deleteCronJob(jn)
    95  	}
    96  
    97  	id, err := r.crons.AddFuncWithSeconds(sch, func() {
    98  		localCr := &apiv1.PerconaXtraDBCluster{}
    99  		err := r.client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, localCr)
   100  		if k8serrors.IsNotFound(err) {
   101  			log.Info("cluster is not found, deleting the job",
   102  				"name", jn, "cluster", cr.Name, "namespace", cr.Namespace)
   103  			r.deleteCronJob(jn)
   104  			return
   105  		}
   106  		if err != nil {
   107  			log.Error(err, "failed to get CR")
   108  			return
   109  		}
   110  
   111  		if localCr.Status.Status != apiv1.AppStateReady {
   112  			log.Info("cluster is not ready")
   113  			return
   114  		}
   115  
   116  		err = localCr.CheckNSetDefaults(r.serverVersion, log)
   117  		if err != nil {
   118  			log.Error(err, "failed to set defaults for CR")
   119  			return
   120  		}
   121  
   122  		_, err = r.getNewVersions(ctx, localCr, vs)
   123  		if err != nil {
   124  			log.Error(err, "failed to send telemetry")
   125  		}
   126  	})
   127  	if err != nil {
   128  		return err
   129  	}
   130  
   131  	log.Info("add new job", "name", jn, "schedule", sch)
   132  
   133  	r.crons.ensureVersionJobs.Store(jn, Schedule{
   134  		ID:           id,
   135  		CronSchedule: sch,
   136  	})
   137  
   138  	// send telemetry on startup
   139  	_, err = r.getNewVersions(ctx, cr, vs)
   140  	if err != nil {
   141  		log.Error(err, "failed to send telemetry")
   142  	}
   143  
   144  	return nil
   145  }
   146  
   147  func (r *ReconcilePerconaXtraDBCluster) scheduleEnsurePXCVersion(ctx context.Context, cr *apiv1.PerconaXtraDBCluster, vs VersionService) error {
   148  	log := logf.FromContext(ctx)
   149  
   150  	jn := versionJobName(cr)
   151  	scheduleRaw, ok := r.crons.ensureVersionJobs.Load(jn)
   152  	if cr.Spec.UpgradeOptions.Schedule == "" || !(versionUpgradeEnabled(cr) || telemetryEnabled()) {
   153  		if ok {
   154  			r.deleteCronJob(jn)
   155  		}
   156  		return nil
   157  	}
   158  
   159  	schedule := Schedule{}
   160  	if ok {
   161  		schedule = scheduleRaw.(Schedule)
   162  	}
   163  
   164  	if ok && schedule.CronSchedule == cr.Spec.UpgradeOptions.Schedule {
   165  		return nil
   166  	}
   167  
   168  	if ok {
   169  		log.Info("remove job because of new", "old", schedule.CronSchedule, "new", cr.Spec.UpgradeOptions.Schedule)
   170  		r.deleteCronJob(jn)
   171  	}
   172  
   173  	nn := types.NamespacedName{
   174  		Name:      cr.Name,
   175  		Namespace: cr.Namespace,
   176  	}
   177  
   178  	l := r.lockers.LoadOrCreate(nn.String())
   179  
   180  	id, err := r.crons.AddFuncWithSeconds(cr.Spec.UpgradeOptions.Schedule, func() {
   181  		l.statusMutex.Lock()
   182  		defer l.statusMutex.Unlock()
   183  
   184  		if !atomic.CompareAndSwapInt32(l.updateSync, updateDone, updateWait) {
   185  			return
   186  		}
   187  
   188  		localCr := &apiv1.PerconaXtraDBCluster{}
   189  		err := r.client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, localCr)
   190  		if k8serrors.IsNotFound(err) {
   191  			log.Info("cluster is not found, deleting the job",
   192  				"name", jn, "cluster", cr.Name, "namespace", cr.Namespace)
   193  			r.deleteCronJob(jn)
   194  			return
   195  		}
   196  		if err != nil {
   197  			log.Error(err, "failed to get CR")
   198  			return
   199  		}
   200  
   201  		if localCr.Status.Status != apiv1.AppStateReady {
   202  			log.Info("cluster is not ready")
   203  			return
   204  		}
   205  
   206  		err = localCr.CheckNSetDefaults(r.serverVersion, log)
   207  		if err != nil {
   208  			log.Error(err, "failed to set defaults for CR")
   209  			return
   210  		}
   211  
   212  		err = r.ensurePXCVersion(ctx, localCr, vs)
   213  		if err != nil {
   214  			log.Error(err, "failed to ensure version")
   215  		}
   216  	})
   217  	if err != nil {
   218  		return err
   219  	}
   220  
   221  	log.Info("add new job", "name", jn, "schedule", cr.Spec.UpgradeOptions.Schedule)
   222  
   223  	r.crons.ensureVersionJobs.Store(jn, Schedule{
   224  		ID:           id,
   225  		CronSchedule: cr.Spec.UpgradeOptions.Schedule,
   226  	})
   227  
   228  	return nil
   229  }
   230  
   231  func (r *ReconcilePerconaXtraDBCluster) getNewVersions(ctx context.Context, cr *apiv1.PerconaXtraDBCluster, vs VersionService) (DepVersion, error) {
   232  	log := logf.FromContext(ctx)
   233  
   234  	watchNs, err := k8s.GetWatchNamespace()
   235  	if err != nil {
   236  		return DepVersion{}, errors.Wrap(err, "get WATCH_NAMESPACE env variable")
   237  	}
   238  
   239  	vm := versionMeta{
   240  		Apply:               cr.Spec.UpgradeOptions.Apply,
   241  		Platform:            string(cr.Spec.Platform),
   242  		KubeVersion:         r.serverVersion.Info.GitVersion,
   243  		PXCVersion:          cr.Status.PXC.Version,
   244  		PMMVersion:          cr.Status.PMM.Version,
   245  		HAProxyVersion:      cr.Status.HAProxy.Version,
   246  		ProxySQLVersion:     cr.Status.ProxySQL.Version,
   247  		BackupVersion:       cr.Status.Backup.Version,
   248  		LogCollectorVersion: cr.Status.LogCollector.Version,
   249  		CRUID:               string(cr.GetUID()),
   250  		ClusterWideEnabled:  watchNs == "",
   251  	}
   252  
   253  	endpoint := apiv1.GetDefaultVersionServiceEndpoint()
   254  	log.V(1).Info("Use version service endpoint", "endpoint", endpoint)
   255  
   256  	if telemetryEnabled() && (!versionUpgradeEnabled(cr) || cr.Spec.UpgradeOptions.VersionServiceEndpoint != apiv1.GetDefaultVersionServiceEndpoint()) {
   257  		_, err := vs.GetExactVersion(cr, endpoint, vm)
   258  		if err != nil {
   259  			log.Error(err, "failed to send telemetry to "+apiv1.GetDefaultVersionServiceEndpoint())
   260  		}
   261  		return DepVersion{}, nil
   262  	}
   263  
   264  	newVersion, err := vs.GetExactVersion(cr, cr.Spec.UpgradeOptions.VersionServiceEndpoint, vm)
   265  	if err != nil {
   266  		return DepVersion{}, errors.Wrap(err, "failed to check version")
   267  	}
   268  
   269  	return newVersion, nil
   270  }
   271  
   272  func (r *ReconcilePerconaXtraDBCluster) ensurePXCVersion(ctx context.Context, cr *apiv1.PerconaXtraDBCluster, vs VersionService) error {
   273  	log := logf.FromContext(ctx)
   274  
   275  	if cr.Status.Status != apiv1.AppStateReady && cr.Status.PXC.Version != "" {
   276  		return errors.New("cluster is not ready")
   277  	}
   278  
   279  	if !versionUpgradeEnabled(cr) {
   280  		return nil
   281  	}
   282  
   283  	newVersion, err := r.getNewVersions(ctx, cr, vs)
   284  	if err != nil {
   285  		return errors.Wrap(err, "failed to get new versions")
   286  	}
   287  
   288  	patch := client.MergeFrom(cr.DeepCopy())
   289  
   290  	if cr.Spec.PXC != nil && cr.Spec.PXC.Image != newVersion.PXCImage {
   291  		if cr.Status.PXC.Version == "" {
   292  			log.Info("set PXC version to " + newVersion.PXCVersion)
   293  		} else {
   294  			log.Info("update PXC version", "old version", cr.Status.PXC.Version, "new version", newVersion.PXCVersion)
   295  		}
   296  		cr.Spec.PXC.Image = newVersion.PXCImage
   297  	}
   298  
   299  	if cr.Spec.Backup != nil && cr.Spec.Backup.Image != newVersion.BackupImage {
   300  		if cr.Status.Backup.Version == "" {
   301  			log.Info("set Backup version to " + newVersion.BackupVersion)
   302  		} else {
   303  			log.Info("update Backup version", "old version", cr.Status.Backup.Version, "new version", newVersion.BackupVersion)
   304  		}
   305  		cr.Spec.Backup.Image = newVersion.BackupImage
   306  	}
   307  
   308  	if cr.Spec.PMM != nil && cr.Spec.PMM.Enabled && cr.Spec.PMM.Image != newVersion.PMMImage {
   309  		if cr.Status.PMM.Version == "" {
   310  			log.Info("set PMM version to " + newVersion.PMMVersion)
   311  		} else {
   312  			log.Info("update PMM version", "old version", cr.Status.PMM.Version, "new version", newVersion.PMMVersion)
   313  		}
   314  		cr.Spec.PMM.Image = newVersion.PMMImage
   315  	}
   316  
   317  	if cr.Spec.ProxySQLEnabled() && cr.Spec.ProxySQL.Image != newVersion.ProxySqlImage {
   318  		if cr.Status.ProxySQL.Version == "" {
   319  			log.Info("set ProxySQL version to " + newVersion.ProxySqlVersion)
   320  		} else {
   321  			log.Info("update ProxySQL version", "old version", cr.Status.ProxySQL.Version, "new version", newVersion.ProxySqlVersion)
   322  		}
   323  		cr.Spec.ProxySQL.Image = newVersion.ProxySqlImage
   324  	}
   325  
   326  	if cr.Spec.HAProxyEnabled() && cr.Spec.HAProxy.Image != newVersion.HAProxyImage {
   327  		if cr.Status.HAProxy.Version == "" {
   328  			log.Info("set HAProxy version to " + newVersion.HAProxyVersion)
   329  		} else {
   330  			log.Info("update HAProxy version", "old version", cr.Status.HAProxy.Version, "new version", newVersion.HAProxyVersion)
   331  		}
   332  		cr.Spec.HAProxy.Image = newVersion.HAProxyImage
   333  	}
   334  
   335  	if cr.Spec.LogCollector != nil && cr.Spec.LogCollector.Enabled && cr.Spec.LogCollector.Image != newVersion.LogCollectorImage {
   336  		if cr.Status.LogCollector.Version == "" {
   337  			log.Info("set LogCollector version to " + newVersion.LogCollectorVersion)
   338  		} else {
   339  			log.Info("update LogCollector version", "old version", cr.Status.LogCollector.Version, "new version", newVersion.LogCollectorVersion)
   340  		}
   341  		cr.Spec.LogCollector.Image = newVersion.LogCollectorImage
   342  	}
   343  
   344  	err = r.client.Patch(context.Background(), cr.DeepCopy(), patch)
   345  	if err != nil {
   346  		return errors.Wrap(err, "failed to update CR")
   347  	}
   348  
   349  	cr.Status.ProxySQL.Version = newVersion.ProxySqlVersion
   350  	cr.Status.HAProxy.Version = newVersion.HAProxyVersion
   351  	cr.Status.PMM.Version = newVersion.PMMVersion
   352  	cr.Status.Backup.Version = newVersion.BackupVersion
   353  	cr.Status.PXC.Version = newVersion.PXCVersion
   354  	cr.Status.PXC.Image = newVersion.PXCImage
   355  	cr.Status.LogCollector.Version = newVersion.LogCollectorVersion
   356  
   357  	err = k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error {
   358  		localCr := &apiv1.PerconaXtraDBCluster{}
   359  
   360  		err := r.client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, localCr)
   361  		if err != nil {
   362  			return err
   363  		}
   364  
   365  		localCr.Status = cr.Status
   366  
   367  		return r.client.Status().Update(ctx, localCr)
   368  	})
   369  	if err != nil {
   370  		return errors.Wrap(err, "failed to update CR status")
   371  	}
   372  
   373  	time.Sleep(1 * time.Second)
   374  
   375  	return nil
   376  }
   377  
   378  func (r *ReconcilePerconaXtraDBCluster) mysqlVersion(ctx context.Context, cr *apiv1.PerconaXtraDBCluster, sfs apiv1.StatefulApp) (string, error) {
   379  	log := logf.FromContext(ctx)
   380  
   381  	if cr.Status.PXC.Ready < 1 {
   382  		return "", versionNotReadyErr
   383  	}
   384  
   385  	if cr.Status.ObservedGeneration != cr.ObjectMeta.Generation {
   386  		return "", versionNotReadyErr
   387  	}
   388  
   389  	if cr.Status.PXC.Image == cr.Spec.PXC.Image {
   390  		return "", versionNotReadyErr
   391  	}
   392  
   393  	upgradeInProgress, err := r.upgradeInProgress(cr, "pxc")
   394  	if err != nil {
   395  		return "", errors.Wrap(err, "check pxc upgrade progress")
   396  	}
   397  	if upgradeInProgress {
   398  		return "", versionNotReadyErr
   399  	}
   400  
   401  	list := corev1.PodList{}
   402  	if err := r.client.List(ctx,
   403  		&list,
   404  		&client.ListOptions{
   405  			Namespace:     sfs.StatefulSet().Namespace,
   406  			LabelSelector: labels.SelectorFromSet(sfs.Labels()),
   407  		},
   408  	); err != nil {
   409  		return "", errors.Wrap(err, "get pod list")
   410  	}
   411  
   412  	port := int32(3306)
   413  	secrets := cr.Spec.SecretsName
   414  	if cr.CompareVersionWith("1.6.0") >= 0 {
   415  		port = int32(33062)
   416  		secrets = "internal-" + cr.Name
   417  	}
   418  
   419  	for _, pod := range list.Items {
   420  		if !isPodReady(pod) {
   421  			continue
   422  		}
   423  
   424  		database, err := queries.New(r.client, cr.Namespace, secrets, users.Root, pod.Name+"."+cr.Name+"-pxc."+cr.Namespace, port, cr.Spec.PXC.ReadinessProbes.TimeoutSeconds)
   425  		if err != nil {
   426  			log.Error(err, "failed to create db instance")
   427  			continue
   428  		}
   429  
   430  		defer database.Close()
   431  
   432  		version, err := database.Version()
   433  		if err != nil {
   434  			log.Error(err, "failed to get pxc version")
   435  			continue
   436  		}
   437  
   438  		return version, nil
   439  	}
   440  
   441  	return "", errors.New("failed to reach any pod")
   442  }
   443  
   444  func (r *ReconcilePerconaXtraDBCluster) fetchVersionFromPXC(ctx context.Context, cr *apiv1.PerconaXtraDBCluster, sfs apiv1.StatefulApp) error {
   445  	log := logf.FromContext(ctx)
   446  
   447  	if cr.Status.PXC.Status != apiv1.AppStateReady {
   448  		return nil
   449  	}
   450  
   451  	version, err := r.mysqlVersion(ctx, cr, sfs)
   452  	if err != nil {
   453  		if errors.Is(err, versionNotReadyErr) {
   454  			return nil
   455  		}
   456  
   457  		return err
   458  	}
   459  
   460  	cr.Status.PXC.Version = version
   461  	cr.Status.PXC.Image = cr.Spec.PXC.Image
   462  
   463  	log.Info("update PXC version (fetched from db)", "new version", version)
   464  	err = k8sretry.RetryOnConflict(k8sretry.DefaultRetry, func() error {
   465  		localCr := &apiv1.PerconaXtraDBCluster{}
   466  
   467  		err := r.client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, localCr)
   468  		if err != nil {
   469  			return err
   470  		}
   471  
   472  		localCr.Status = cr.Status
   473  
   474  		return r.client.Status().Update(ctx, localCr)
   475  	})
   476  	if err != nil {
   477  		return errors.Wrap(err, "failed to update CR")
   478  	}
   479  	return nil
   480  }
   481  
   482  func telemetryEnabled() bool {
   483  	value, ok := os.LookupEnv("DISABLE_TELEMETRY")
   484  	if ok {
   485  		return value != "true"
   486  	}
   487  	return true
   488  }
   489  
   490  func versionUpgradeEnabled(cr *apiv1.PerconaXtraDBCluster) bool {
   491  	return strings.ToLower(cr.Spec.UpgradeOptions.Apply) != apiv1.UpgradeStrategyNever &&
   492  		strings.ToLower(cr.Spec.UpgradeOptions.Apply) != apiv1.UpgradeStrategyDisabled
   493  }
   494  
   495  // setCRVersion sets operator version of PerconaXtraDBCluster.
   496  // The new (semver-matching) version is determined by the CR's crVersion field.
   497  // If the crVersion is an empty string, it sets the current operator version.
   498  func (r *ReconcilePerconaXtraDBCluster) setCRVersion(ctx context.Context, cr *apiv1.PerconaXtraDBCluster) error {
   499  	if len(cr.Spec.CRVersion) > 0 {
   500  		return nil
   501  	}
   502  
   503  	orig := cr.DeepCopy()
   504  	cr.Spec.CRVersion = version.Version
   505  
   506  	if err := r.client.Patch(ctx, cr, client.MergeFrom(orig)); err != nil {
   507  		return errors.Wrap(err, "patch CR")
   508  	}
   509  
   510  	logf.FromContext(ctx).Info("Set CR version", "version", cr.Spec.CRVersion)
   511  
   512  	return nil
   513  }