github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/convergence_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package gossip_test 12 13 import ( 14 "context" 15 "testing" 16 17 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 18 "github.com/cockroachdb/cockroach/pkg/gossip/simulation" 19 "github.com/cockroachdb/cockroach/pkg/testutils" 20 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 21 "github.com/cockroachdb/cockroach/pkg/util/log" 22 "github.com/cockroachdb/cockroach/pkg/util/stop" 23 ) 24 25 // The tests in this package have fairly small cluster sizes for the sake of 26 // not taking too long to run when run as part of the normal unit tests. If 27 // you're testing out gossip network behavior, you may find it useful to 28 // increase the network size for these tests (adjusting the max thresholds 29 // accordingly) and see how things behave. 30 const ( 31 testConvergenceSize = 10 32 testReachesEquilibriumSize = 24 33 ) 34 35 func connectionsRefused(network *simulation.Network) int64 { 36 var connsRefused int64 37 for _, node := range network.Nodes { 38 connsRefused += node.Gossip.GetNodeMetrics().ConnectionsRefused.Count() 39 } 40 return connsRefused 41 } 42 43 // TestConvergence verifies that a node gossip network converges within 44 // a fixed number of simulation cycles. It's really difficult to 45 // determine the right number for cycles because different things can 46 // happen during a single cycle, depending on how much CPU time is 47 // available. Eliminating this variability by getting more 48 // synchronization primitives in place for the simulation is possible, 49 // though two attempts so far have introduced more complexity into the 50 // actual production gossip code than seems worthwhile for a unittest. 51 // As such, the thresholds are drastically higher than is normally needed. 52 // 53 // As of Jan 2017, this normally takes ~12 cycles and 8-12 refused connections. 54 func TestConvergence(t *testing.T) { 55 defer leaktest.AfterTest(t)() 56 if testutils.NightlyStress() { 57 t.Skip() 58 } 59 60 stopper := stop.NewStopper() 61 defer stopper.Stop(context.Background()) 62 63 network := simulation.NewNetwork(stopper, testConvergenceSize, true, zonepb.DefaultZoneConfigRef()) 64 65 const maxCycles = 100 66 if connectedCycle := network.RunUntilFullyConnected(); connectedCycle > maxCycles { 67 log.Warningf(context.Background(), "expected a fully-connected network within %d cycles; took %d", 68 maxCycles, connectedCycle) 69 } 70 71 const maxConnsRefused = 50 72 if connsRefused := connectionsRefused(network); connsRefused > maxConnsRefused { 73 log.Warningf(context.Background(), 74 "expected network to fully connect with <= %d connections refused; took %d", 75 maxConnsRefused, connsRefused) 76 } 77 } 78 79 // TestNetworkReachesEquilibrium ensures that the gossip network stops bouncing 80 // refused connections around after a while and settles down. 81 // As mentioned in the comment for TestConvergence, there is a large amount of 82 // variability in how much gets done in each network cycle, and thus we have 83 // to set thresholds that are drastically higher than is needed in the normal 84 // case. 85 // 86 // As of Jan 2017, this normally takes 8-9 cycles and 50-60 refused connections. 87 func TestNetworkReachesEquilibrium(t *testing.T) { 88 defer leaktest.AfterTest(t)() 89 if testutils.NightlyStress() { 90 t.Skip() 91 } 92 93 stopper := stop.NewStopper() 94 defer stopper.Stop(context.Background()) 95 96 network := simulation.NewNetwork(stopper, testReachesEquilibriumSize, true, zonepb.DefaultZoneConfigRef()) 97 98 var connsRefused int64 99 var cyclesWithoutChange int 100 var numCycles int 101 network.SimulateNetwork(func(cycle int, network *simulation.Network) bool { 102 numCycles = cycle 103 newConnsRefused := connectionsRefused(network) 104 if newConnsRefused > connsRefused { 105 connsRefused = newConnsRefused 106 cyclesWithoutChange = 0 107 } else { 108 cyclesWithoutChange++ 109 } 110 if cycle%5 == 0 { 111 log.Infof(context.Background(), "cycle: %d, cyclesWithoutChange: %d, fullyConnected: %v", 112 cycle, cyclesWithoutChange, network.IsNetworkConnected()) 113 } 114 return cyclesWithoutChange < 5 115 }) 116 117 const maxCycles = 200 118 if numCycles > maxCycles { 119 log.Warningf(context.Background(), "expected a non-thrashing network within %d cycles; took %d", 120 maxCycles, numCycles) 121 } 122 123 const maxConnsRefused = 500 124 if connsRefused > maxConnsRefused { 125 log.Warningf(context.Background(), 126 "expected thrashing to die down with <= %d connections refused; took %d", 127 maxConnsRefused, connsRefused) 128 } 129 }