github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/server_update.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package server
    12  
    13  import (
    14  	"context"
    15  	"sync/atomic"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    19  	"github.com/cockroachdb/cockroach/pkg/security"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    22  	"github.com/cockroachdb/cockroach/pkg/util/log"
    23  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  // startAttemptUpgrade attempts to upgrade cluster version.
    28  func (s *Server) startAttemptUpgrade(ctx context.Context) {
    29  	ctx, cancel := s.stopper.WithCancelOnQuiesce(ctx)
    30  	if err := s.stopper.RunAsyncTask(ctx, "auto-upgrade", func(ctx context.Context) {
    31  		defer cancel()
    32  		retryOpts := retry.Options{
    33  			InitialBackoff: time.Second,
    34  			MaxBackoff:     30 * time.Second,
    35  			Multiplier:     2,
    36  			Closer:         s.stopper.ShouldQuiesce(),
    37  		}
    38  
    39  		for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
    40  			// Check if auto upgrade is disabled for test purposes.
    41  			if k := s.cfg.TestingKnobs.Server; k != nil {
    42  				upgradeTestingKnobs := k.(*TestingKnobs)
    43  				if disable := atomic.LoadInt32(&upgradeTestingKnobs.DisableAutomaticVersionUpgrade); disable == 1 {
    44  					log.Infof(ctx, "auto upgrade disabled by testing")
    45  					continue
    46  				}
    47  			}
    48  
    49  			// Check if we should upgrade cluster version, keep checking upgrade
    50  			// status, or stop attempting upgrade.
    51  			if quit, err := s.upgradeStatus(ctx); err != nil {
    52  				log.Infof(ctx, "failed attempt to upgrade cluster version, error: %s", err)
    53  				continue
    54  			} else if quit {
    55  				log.Info(ctx, "no need to upgrade, cluster already at the newest version")
    56  				return
    57  			}
    58  
    59  			upgradeRetryOpts := retry.Options{
    60  				InitialBackoff: 5 * time.Second,
    61  				MaxBackoff:     10 * time.Second,
    62  				Multiplier:     2,
    63  				Closer:         s.stopper.ShouldQuiesce(),
    64  			}
    65  
    66  			// Run the set cluster setting version statement and reset cluster setting
    67  			// `cluster.preserve_downgrade_option` statement in a transaction until
    68  			// success.
    69  			for ur := retry.StartWithCtx(ctx, upgradeRetryOpts); ur.Next(); {
    70  				if _, err := s.sqlServer.internalExecutor.ExecEx(
    71  					ctx, "set-version", nil, /* txn */
    72  					sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser},
    73  					"SET CLUSTER SETTING version = crdb_internal.node_executable_version();",
    74  				); err != nil {
    75  					log.Infof(ctx, "error when finalizing cluster version upgrade: %s", err)
    76  				} else {
    77  					log.Info(ctx, "successfully upgraded cluster version")
    78  					return
    79  				}
    80  			}
    81  		}
    82  	}); err != nil {
    83  		cancel()
    84  		log.Infof(ctx, "failed attempt to upgrade cluster version, error: %s", err)
    85  	}
    86  }
    87  
    88  // upgradeStatus lets the main checking loop know if we should do upgrade,
    89  // keep checking upgrade status, or stop attempting upgrade.
    90  // Return (true, nil) to indicate we want to stop attempting upgrade.
    91  // Return (false, nil) to indicate we want to do the upgrade.
    92  // Return (false, err) to indicate we want to keep checking upgrade status.
    93  func (s *Server) upgradeStatus(ctx context.Context) (bool, error) {
    94  	// Check if all nodes are running at the newest version.
    95  	clusterVersion, err := s.clusterVersion(ctx)
    96  	if err != nil {
    97  		return false, err
    98  	}
    99  
   100  	nodesWithLiveness, err := s.status.nodesStatusWithLiveness(ctx)
   101  	if err != nil {
   102  		return false, err
   103  	}
   104  
   105  	var newVersion string
   106  	var notRunningErr error
   107  	for nodeID, st := range nodesWithLiveness {
   108  		if st.livenessStatus != kvserverpb.NodeLivenessStatus_LIVE &&
   109  			st.livenessStatus != kvserverpb.NodeLivenessStatus_DECOMMISSIONING {
   110  			// We definitely won't be able to upgrade, but defer this error as
   111  			// we may find out that we are already at the latest version (the
   112  			// cluster may be up to date, but a node is down).
   113  			if notRunningErr == nil {
   114  				notRunningErr = errors.Errorf("node %d not running (%s), cannot determine version", nodeID, st.livenessStatus)
   115  			}
   116  			continue
   117  		}
   118  
   119  		version := st.NodeStatus.Desc.ServerVersion.String()
   120  		if newVersion == "" {
   121  			newVersion = version
   122  		} else if version != newVersion {
   123  			return false, errors.Newf("not all nodes are running the latest version yet (saw %s and %s)", newVersion, version)
   124  		}
   125  	}
   126  
   127  	if newVersion == "" {
   128  		return false, errors.Errorf("no live nodes found")
   129  	}
   130  
   131  	// Check if we really need to upgrade cluster version.
   132  	if newVersion == clusterVersion {
   133  		return true, nil
   134  	}
   135  
   136  	if notRunningErr != nil {
   137  		return false, notRunningErr
   138  	}
   139  
   140  	// Check if auto upgrade is enabled at current version. This is read from
   141  	// the KV store so that it's in effect on all nodes immediately following a
   142  	// SET CLUSTER SETTING.
   143  	datums, err := s.sqlServer.internalExecutor.QueryEx(
   144  		ctx, "read-downgrade", nil, /* txn */
   145  		sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser},
   146  		"SELECT value FROM system.settings WHERE name = 'cluster.preserve_downgrade_option';",
   147  	)
   148  	if err != nil {
   149  		return false, err
   150  	}
   151  
   152  	if len(datums) != 0 {
   153  		row := datums[0]
   154  		downgradeVersion := string(tree.MustBeDString(row[0]))
   155  
   156  		if clusterVersion == downgradeVersion {
   157  			return false, errors.Errorf("auto upgrade is disabled for current version: %s", clusterVersion)
   158  		}
   159  	}
   160  
   161  	return false, nil
   162  }
   163  
   164  // clusterVersion returns the current cluster version from the SQL subsystem
   165  // (which returns the version from the KV store as opposed to the possibly
   166  // lagging settings subsystem).
   167  func (s *Server) clusterVersion(ctx context.Context) (string, error) {
   168  	datums, err := s.sqlServer.internalExecutor.QueryEx(
   169  		ctx, "show-version", nil, /* txn */
   170  		sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser},
   171  		"SHOW CLUSTER SETTING version;",
   172  	)
   173  	if err != nil {
   174  		return "", err
   175  	}
   176  	if len(datums) == 0 {
   177  		return "", errors.New("cluster version is not set")
   178  	}
   179  	row := datums[0]
   180  	clusterVersion := string(tree.MustBeDString(row[0]))
   181  
   182  	return clusterVersion, nil
   183  }