github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/server_update.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package server 12 13 import ( 14 "context" 15 "sync/atomic" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 19 "github.com/cockroachdb/cockroach/pkg/security" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 22 "github.com/cockroachdb/cockroach/pkg/util/log" 23 "github.com/cockroachdb/cockroach/pkg/util/retry" 24 "github.com/cockroachdb/errors" 25 ) 26 27 // startAttemptUpgrade attempts to upgrade cluster version. 28 func (s *Server) startAttemptUpgrade(ctx context.Context) { 29 ctx, cancel := s.stopper.WithCancelOnQuiesce(ctx) 30 if err := s.stopper.RunAsyncTask(ctx, "auto-upgrade", func(ctx context.Context) { 31 defer cancel() 32 retryOpts := retry.Options{ 33 InitialBackoff: time.Second, 34 MaxBackoff: 30 * time.Second, 35 Multiplier: 2, 36 Closer: s.stopper.ShouldQuiesce(), 37 } 38 39 for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); { 40 // Check if auto upgrade is disabled for test purposes. 41 if k := s.cfg.TestingKnobs.Server; k != nil { 42 upgradeTestingKnobs := k.(*TestingKnobs) 43 if disable := atomic.LoadInt32(&upgradeTestingKnobs.DisableAutomaticVersionUpgrade); disable == 1 { 44 log.Infof(ctx, "auto upgrade disabled by testing") 45 continue 46 } 47 } 48 49 // Check if we should upgrade cluster version, keep checking upgrade 50 // status, or stop attempting upgrade. 51 if quit, err := s.upgradeStatus(ctx); err != nil { 52 log.Infof(ctx, "failed attempt to upgrade cluster version, error: %s", err) 53 continue 54 } else if quit { 55 log.Info(ctx, "no need to upgrade, cluster already at the newest version") 56 return 57 } 58 59 upgradeRetryOpts := retry.Options{ 60 InitialBackoff: 5 * time.Second, 61 MaxBackoff: 10 * time.Second, 62 Multiplier: 2, 63 Closer: s.stopper.ShouldQuiesce(), 64 } 65 66 // Run the set cluster setting version statement and reset cluster setting 67 // `cluster.preserve_downgrade_option` statement in a transaction until 68 // success. 69 for ur := retry.StartWithCtx(ctx, upgradeRetryOpts); ur.Next(); { 70 if _, err := s.sqlServer.internalExecutor.ExecEx( 71 ctx, "set-version", nil, /* txn */ 72 sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser}, 73 "SET CLUSTER SETTING version = crdb_internal.node_executable_version();", 74 ); err != nil { 75 log.Infof(ctx, "error when finalizing cluster version upgrade: %s", err) 76 } else { 77 log.Info(ctx, "successfully upgraded cluster version") 78 return 79 } 80 } 81 } 82 }); err != nil { 83 cancel() 84 log.Infof(ctx, "failed attempt to upgrade cluster version, error: %s", err) 85 } 86 } 87 88 // upgradeStatus lets the main checking loop know if we should do upgrade, 89 // keep checking upgrade status, or stop attempting upgrade. 90 // Return (true, nil) to indicate we want to stop attempting upgrade. 91 // Return (false, nil) to indicate we want to do the upgrade. 92 // Return (false, err) to indicate we want to keep checking upgrade status. 93 func (s *Server) upgradeStatus(ctx context.Context) (bool, error) { 94 // Check if all nodes are running at the newest version. 95 clusterVersion, err := s.clusterVersion(ctx) 96 if err != nil { 97 return false, err 98 } 99 100 nodesWithLiveness, err := s.status.nodesStatusWithLiveness(ctx) 101 if err != nil { 102 return false, err 103 } 104 105 var newVersion string 106 var notRunningErr error 107 for nodeID, st := range nodesWithLiveness { 108 if st.livenessStatus != kvserverpb.NodeLivenessStatus_LIVE && 109 st.livenessStatus != kvserverpb.NodeLivenessStatus_DECOMMISSIONING { 110 // We definitely won't be able to upgrade, but defer this error as 111 // we may find out that we are already at the latest version (the 112 // cluster may be up to date, but a node is down). 113 if notRunningErr == nil { 114 notRunningErr = errors.Errorf("node %d not running (%s), cannot determine version", nodeID, st.livenessStatus) 115 } 116 continue 117 } 118 119 version := st.NodeStatus.Desc.ServerVersion.String() 120 if newVersion == "" { 121 newVersion = version 122 } else if version != newVersion { 123 return false, errors.Newf("not all nodes are running the latest version yet (saw %s and %s)", newVersion, version) 124 } 125 } 126 127 if newVersion == "" { 128 return false, errors.Errorf("no live nodes found") 129 } 130 131 // Check if we really need to upgrade cluster version. 132 if newVersion == clusterVersion { 133 return true, nil 134 } 135 136 if notRunningErr != nil { 137 return false, notRunningErr 138 } 139 140 // Check if auto upgrade is enabled at current version. This is read from 141 // the KV store so that it's in effect on all nodes immediately following a 142 // SET CLUSTER SETTING. 143 datums, err := s.sqlServer.internalExecutor.QueryEx( 144 ctx, "read-downgrade", nil, /* txn */ 145 sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser}, 146 "SELECT value FROM system.settings WHERE name = 'cluster.preserve_downgrade_option';", 147 ) 148 if err != nil { 149 return false, err 150 } 151 152 if len(datums) != 0 { 153 row := datums[0] 154 downgradeVersion := string(tree.MustBeDString(row[0])) 155 156 if clusterVersion == downgradeVersion { 157 return false, errors.Errorf("auto upgrade is disabled for current version: %s", clusterVersion) 158 } 159 } 160 161 return false, nil 162 } 163 164 // clusterVersion returns the current cluster version from the SQL subsystem 165 // (which returns the version from the KV store as opposed to the possibly 166 // lagging settings subsystem). 167 func (s *Server) clusterVersion(ctx context.Context) (string, error) { 168 datums, err := s.sqlServer.internalExecutor.QueryEx( 169 ctx, "show-version", nil, /* txn */ 170 sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser}, 171 "SHOW CLUSTER SETTING version;", 172 ) 173 if err != nil { 174 return "", err 175 } 176 if len(datums) == 0 { 177 return "", errors.New("cluster version is not set") 178 } 179 row := datums[0] 180 clusterVersion := string(tree.MustBeDString(row[0])) 181 182 return clusterVersion, nil 183 }