github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/version_cluster_test.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package server_test 12 13 import ( 14 "context" 15 gosql "database/sql" 16 "fmt" 17 "path/filepath" 18 "strconv" 19 "sync/atomic" 20 "testing" 21 22 "github.com/cockroachdb/cockroach/pkg/base" 23 "github.com/cockroachdb/cockroach/pkg/clusterversion" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/server" 27 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 28 "github.com/cockroachdb/cockroach/pkg/testutils" 29 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 30 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 31 "github.com/cockroachdb/cockroach/pkg/util/log" 32 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 33 "github.com/cockroachdb/errors" 34 "github.com/stretchr/testify/require" 35 ) 36 37 type testClusterWithHelpers struct { 38 *testing.T 39 *testcluster.TestCluster 40 args func() map[int]base.TestServerArgs 41 } 42 43 func (th *testClusterWithHelpers) getVersionFromShow(i int) string { 44 var version string 45 if err := th.ServerConn(i).QueryRow("SHOW CLUSTER SETTING version").Scan(&version); err != nil { 46 th.Fatalf("%d: %s", i, err) 47 } 48 return version 49 } 50 51 func (th *testClusterWithHelpers) getVersionFromSelect(i int) string { 52 var version string 53 if err := th.ServerConn(i).QueryRow("SELECT value FROM system.settings WHERE name = 'version'").Scan(&version); err != nil { 54 if errors.Is(err, gosql.ErrNoRows) { 55 return "" 56 } 57 th.Fatalf("%d: %s (%T)", i, err, err) 58 } 59 var v clusterversion.ClusterVersion 60 if err := protoutil.Unmarshal([]byte(version), &v); err != nil { 61 th.Fatalf("%d: %s", i, err) 62 } 63 return v.Version.String() 64 } 65 66 func (th *testClusterWithHelpers) setVersion(i int, version string) error { 67 _, err := th.ServerConn(i).Exec("SET CLUSTER SETTING version = $1", version) 68 return err 69 } 70 71 func (th *testClusterWithHelpers) mustSetVersion(i int, version string) { 72 th.Helper() 73 if err := th.setVersion(i, version); err != nil { 74 th.Fatalf("%d: %s", i, err) 75 } 76 } 77 78 func (th *testClusterWithHelpers) setDowngrade(i int, version string) error { 79 _, err := th.ServerConn(i).Exec("SET CLUSTER SETTING cluster.preserve_downgrade_option = $1", version) 80 return err 81 } 82 83 func (th *testClusterWithHelpers) resetDowngrade(i int) error { 84 _, err := th.ServerConn(i).Exec("RESET CLUSTER SETTING cluster.preserve_downgrade_option") 85 return err 86 } 87 88 // Set up a mixed cluster with the given initial bootstrap version and 89 // len(versions) servers that each run at binary version == v[0] and 90 // minimum supported version == v[1] (i.e. they identify as a binary that can 91 // run with at least a v[1] mixed cluster and is itself v[0]). A directory can 92 // optionally be passed in. 93 func setupMixedCluster( 94 t *testing.T, knobs base.TestingKnobs, versions [][2]string, dir string, 95 ) testClusterWithHelpers { 96 97 twh := testClusterWithHelpers{ 98 T: t, 99 args: func() map[int]base.TestServerArgs { 100 serverArgsPerNode := map[int]base.TestServerArgs{} 101 for i, v := range versions { 102 v0, v1 := roachpb.MustParseVersion(v[0]), roachpb.MustParseVersion(v[1]) 103 st := cluster.MakeTestingClusterSettingsWithVersions(v0, v1, false /* initializeVersion */) 104 args := base.TestServerArgs{ 105 Settings: st, 106 Knobs: knobs, 107 } 108 if dir != "" { 109 args.StoreSpecs = []base.StoreSpec{{Path: filepath.Join(dir, strconv.Itoa(i))}} 110 } 111 serverArgsPerNode[i] = args 112 } 113 return serverArgsPerNode 114 }} 115 116 tc := testcluster.StartTestCluster(t, len(versions), base.TestClusterArgs{ 117 ReplicationMode: base.ReplicationManual, // speeds up test 118 ServerArgsPerNode: twh.args(), 119 }) 120 121 // We simulate crashes using this cluster, and having this enabled (which is 122 // a default migration) causes leaktest to complain. 123 if _, err := tc.ServerConn(0).Exec("SET CLUSTER SETTING diagnostics.reporting.enabled = 'false'"); err != nil { 124 t.Fatal(err) 125 } 126 127 twh.TestCluster = tc 128 return twh 129 } 130 131 // Prev returns the previous version of the given version. 132 // eg. prev(20.1) = 19.2, prev(19.2) = 19.1, prev(19.1) = 2.1, 133 // prev(2.0) = 1.0, prev(2.1) == 2.0, prev(2.1-5) == 2.1. 134 func prev(version roachpb.Version) roachpb.Version { 135 if version.Unstable != 0 { 136 return roachpb.Version{Major: version.Major, Minor: version.Minor} 137 } 138 139 v19_1 := roachpb.Version{Major: 19, Minor: 1} 140 141 if v19_1.Less(version) { 142 if version.Minor > 1 { 143 return roachpb.Version{Major: version.Major, Minor: version.Minor - 1} 144 } 145 // Here we assume that there's going to only be 2 releases per year. 146 // Otherwise we'd need to keep some history of what releases we've had. 147 return roachpb.Version{Major: version.Major - 1, Minor: 2} 148 } 149 150 if version == v19_1 { 151 return roachpb.Version{Major: 2, Minor: 1} 152 } 153 154 // Logic for versions below 19.1. 155 156 if version.Major > 2 { 157 log.Fatalf(context.Background(), "can't compute previous version for %s", version) 158 } 159 160 if version.Minor != 0 { 161 return roachpb.Version{Major: version.Major} 162 } else { 163 // version will be at least 2.0-X, so it's safe to set new Major to be version.Major-1. 164 return roachpb.Version{Major: version.Major - 1} 165 } 166 } 167 168 func TestClusterVersionPersistedOnJoin(t *testing.T) { 169 defer leaktest.AfterTest(t)() 170 171 var newVersion = clusterversion.TestingBinaryVersion 172 var oldVersion = prev(newVersion) 173 174 // Starts 3 nodes that have cluster versions set to be oldVersion and 175 // self-declared binary version set to be newVersion with a cluster 176 // running at the new version (i.e. a very regular setup). Want to check 177 // that after joining the cluster, the second two servers persist the 178 // new version (and not the old one). 179 versions := [][2]string{ 180 {newVersion.String(), oldVersion.String()}, 181 {newVersion.String(), oldVersion.String()}, 182 {newVersion.String(), oldVersion.String()}, 183 } 184 185 knobs := base.TestingKnobs{ 186 Server: &server.TestingKnobs{ 187 DisableAutomaticVersionUpgrade: 1, 188 }, 189 } 190 191 ctx := context.Background() 192 dir, finish := testutils.TempDir(t) 193 defer finish() 194 tc := setupMixedCluster(t, knobs, versions, dir) 195 defer tc.TestCluster.Stopper().Stop(ctx) 196 197 for i := 0; i < len(tc.TestCluster.Servers); i++ { 198 for _, engine := range tc.TestCluster.Servers[i].Engines() { 199 cv, err := kvserver.ReadClusterVersion(ctx, engine) 200 if err != nil { 201 t.Fatal(err) 202 } 203 if cv.Version != newVersion { 204 t.Fatalf("n%d: expected version %v, got %v", i+1, newVersion, cv) 205 } 206 } 207 } 208 } 209 210 func TestClusterVersionUpgrade(t *testing.T) { 211 defer leaktest.AfterTest(t)() 212 ctx := context.Background() 213 214 var newVersion = clusterversion.TestingBinaryVersion 215 var oldVersion = prev(newVersion) 216 217 knobs := base.TestingKnobs{ 218 Server: &server.TestingKnobs{ 219 BootstrapVersionOverride: oldVersion, 220 DisableAutomaticVersionUpgrade: 1, 221 }, 222 } 223 224 rawTC := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 225 ReplicationMode: base.ReplicationManual, // speeds up test 226 ServerArgs: base.TestServerArgs{ 227 Knobs: knobs, 228 }, 229 }) 230 defer rawTC.Stopper().Stop(ctx) 231 tc := testClusterWithHelpers{ 232 T: t, 233 TestCluster: rawTC, 234 } 235 236 { 237 // Regression test for the fix for this issue: 238 // https://github.com/cockroachdb/cockroach/pull/39640#pullrequestreview-275532068 239 // 240 // This can be removed when VersionLearnerReplicas is always-on. 241 k := tc.ScratchRange(t) 242 tc.AddReplicasOrFatal(t, k, tc.Target(2)) 243 _, err := tc.RemoveReplicas(k, tc.Target(2)) 244 require.NoError(t, err) 245 } 246 247 // Set CLUSTER SETTING cluster.preserve_downgrade_option to oldVersion to prevent upgrade. 248 if err := tc.setDowngrade(0, oldVersion.String()); err != nil { 249 t.Fatalf("error setting CLUSTER SETTING cluster.preserve_downgrade_option: %s", err) 250 } 251 atomic.StoreInt32(&knobs.Server.(*server.TestingKnobs).DisableAutomaticVersionUpgrade, 0) 252 253 // Check the cluster version is still oldVersion. 254 curVersion := tc.getVersionFromSelect(0) 255 if curVersion != oldVersion.String() { 256 t.Fatalf("cluster version should still be %s, but get %s", oldVersion, curVersion) 257 } 258 259 // Reset cluster.preserve_downgrade_option to enable auto upgrade. 260 if err := tc.resetDowngrade(0); err != nil { 261 t.Fatalf("error resetting CLUSTER SETTING cluster.preserve_downgrade_option: %s", err) 262 } 263 264 // Check the cluster version is bumped to newVersion. 265 testutils.SucceedsSoon(t, func() error { 266 if version := tc.getVersionFromSelect(0); version != newVersion.String() { 267 return errors.Errorf("cluster version is still %s, should be %s", oldVersion, newVersion) 268 } 269 return nil 270 }) 271 curVersion = tc.getVersionFromSelect(0) 272 isNoopUpdate := curVersion == newVersion.String() 273 274 testutils.SucceedsSoon(t, func() error { 275 for i := 0; i < tc.NumServers(); i++ { 276 st := tc.Servers[i].ClusterSettings() 277 v := st.Version.ActiveVersion(ctx) 278 wantActive := isNoopUpdate 279 if isActive := v.IsActiveVersion(newVersion); isActive != wantActive { 280 return errors.Errorf("%d: v%s active=%t (wanted %t)", i, newVersion, isActive, wantActive) 281 } 282 283 if tableV, curV := tc.getVersionFromSelect(i), v.String(); tableV != curV { 284 return errors.Errorf("%d: read v%s from table, v%s from setting", i, tableV, curV) 285 } 286 } 287 return nil 288 }) 289 290 exp := newVersion.String() 291 292 // Read the versions from the table from each node. Note that under the 293 // hood, everything goes to the lease holder and so it's pretty much 294 // guaranteed that they all read the same, but it doesn't hurt to check. 295 testutils.SucceedsSoon(t, func() error { 296 for i := 0; i < tc.NumServers(); i++ { 297 if version := tc.getVersionFromSelect(i); version != exp { 298 return errors.Errorf("%d: incorrect version %q (wanted %s)", i, version, exp) 299 } 300 if version := tc.getVersionFromShow(i); version != exp { 301 return errors.Errorf("%d: incorrect version %s (wanted %s)", i, version, exp) 302 } 303 } 304 return nil 305 }) 306 307 // Now check the Settings.Version variable. That is the tricky one for which 308 // we "hold back" a gossip update until we've written to the engines. We may 309 // have to wait a bit until we see the new version here, even though it's 310 // already in the table. 311 testutils.SucceedsSoon(t, func() error { 312 for i := 0; i < tc.NumServers(); i++ { 313 vers := tc.Servers[i].ClusterSettings().Version.ActiveVersion(ctx) 314 if v := vers.String(); v == curVersion { 315 if isNoopUpdate { 316 continue 317 } 318 return errors.Errorf("%d: still waiting for %s (now at %s)", i, exp, v) 319 } else if v != exp { 320 t.Fatalf("%d: should never see version %s (wanted %s)", i, v, exp) 321 } 322 } 323 return nil 324 }) 325 326 // Since the wrapped version setting exposes the new versions, it must 327 // definitely be present on all stores on the first try. 328 if err := tc.Servers[1].GetStores().(*kvserver.Stores).VisitStores(func(s *kvserver.Store) error { 329 cv, err := kvserver.ReadVersionFromEngineOrZero(ctx, s.Engine()) 330 if err != nil { 331 return err 332 } 333 if act := cv.Version.String(); act != exp { 334 t.Fatalf("%s: %s persisted, but should be %s", s, act, exp) 335 } 336 return nil 337 }); err != nil { 338 t.Fatal(err) 339 } 340 } 341 342 // Test that, after cluster bootstrap, the different ways of getting the cluster 343 // version all agree. 344 func TestAllVersionsAgree(t *testing.T) { 345 defer leaktest.AfterTest(t)() 346 ctx := context.Background() 347 348 tcRaw := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{}) 349 defer tcRaw.Stopper().Stop(ctx) 350 tc := testClusterWithHelpers{ 351 T: t, 352 TestCluster: tcRaw, 353 } 354 355 exp := clusterversion.TestingBinaryVersion.String() 356 357 // The node bootstrapping the cluster starts at TestingBinaryVersion, the 358 // others start at TestingMinimumSupportedVersion and it takes them a gossip 359 // update to get to TestingBinaryVersion. Hence, we loop until that gossip 360 // comes. 361 testutils.SucceedsSoon(tc, func() error { 362 for i := 0; i < tc.NumServers(); i++ { 363 if version := tc.Servers[i].ClusterSettings().Version.ActiveVersion(ctx); version.String() != exp { 364 return fmt.Errorf("%d: incorrect version %s (wanted %s)", i, version, exp) 365 } 366 if version := tc.getVersionFromShow(i); version != exp { 367 return fmt.Errorf("%d: incorrect version %s (wanted %s)", i, version, exp) 368 } 369 if version := tc.getVersionFromSelect(i); version != exp { 370 return fmt.Errorf("%d: incorrect version %q (wanted %s)", i, version, exp) 371 } 372 } 373 return nil 374 }) 375 } 376 377 // Returns two versions v0 and v1 which correspond to adjacent releases. v1 will 378 // equal the TestingBinaryMinSupportedVersion to avoid rot in tests using this (as we retire 379 // old versions). 380 func v0v1() (roachpb.Version, roachpb.Version) { 381 v1 := clusterversion.TestingBinaryMinSupportedVersion 382 v0 := clusterversion.TestingBinaryMinSupportedVersion 383 if v0.Minor > 0 { 384 v0.Minor-- 385 } else { 386 v0.Major-- 387 } 388 return v0, v1 389 } 390 391 func TestClusterVersionMixedVersionTooOld(t *testing.T) { 392 defer leaktest.AfterTest(t)() 393 ctx := context.Background() 394 395 // Prevent node crashes from generating several megabytes of stacks when 396 // GOTRACEBACK=all, as it is on CI. 397 defer log.DisableTracebacks()() 398 399 exits := make(chan int, 100) 400 401 log.SetExitFunc(true /* hideStack */, func(i int) { exits <- i }) 402 defer log.ResetExitFunc() 403 404 v0, v1 := v0v1() 405 v0s := v0.String() 406 v1s := v1.String() 407 408 // Three nodes at v1 and a fourth one at v0, but all operating at v0. 409 versions := [][2]string{ 410 {v1s, v0s}, 411 {v1s, v0s}, 412 {v1s, v0s}, 413 {v0s, v0s}, 414 } 415 416 // Start by running v0. 417 knobs := base.TestingKnobs{ 418 Server: &server.TestingKnobs{ 419 DisableAutomaticVersionUpgrade: 1, 420 BootstrapVersionOverride: v0, 421 }, 422 } 423 tc := setupMixedCluster(t, knobs, versions, "") 424 defer tc.Stopper().Stop(ctx) 425 426 exp := v1s 427 428 // The last node refuses to perform an upgrade that would risk its own life. 429 if err := tc.setVersion(len(versions)-1, exp); !testutils.IsError(err, 430 fmt.Sprintf("cannot upgrade to %s: node running %s", v1s, v0s), 431 ) { 432 t.Fatal(err) 433 } 434 435 // The other nodes are less careful. 436 tc.mustSetVersion(0, exp) 437 438 <-exits // wait for fourth node to die 439 440 // Check that we can still talk to the first three nodes. 441 for i := 0; i < tc.NumServers()-1; i++ { 442 testutils.SucceedsSoon(tc, func() error { 443 if version := tc.Servers[i].ClusterSettings().Version.ActiveVersion(ctx).String(); version != exp { 444 return errors.Errorf("%d: incorrect version %s (wanted %s)", i, version, exp) 445 } 446 if version := tc.getVersionFromShow(i); version != exp { 447 return errors.Errorf("%d: incorrect version %s (wanted %s)", i, version, exp) 448 } 449 return nil 450 }) 451 } 452 }