github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/init.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package server 12 13 import ( 14 "context" 15 "fmt" 16 17 "github.com/cockroachdb/cockroach/pkg/clusterversion" 18 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 19 "github.com/cockroachdb/cockroach/pkg/gossip" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 23 "github.com/cockroachdb/cockroach/pkg/settings" 24 "github.com/cockroachdb/cockroach/pkg/storage" 25 "github.com/cockroachdb/cockroach/pkg/util/log" 26 "github.com/cockroachdb/cockroach/pkg/util/stop" 27 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 28 "github.com/cockroachdb/cockroach/pkg/util/uuid" 29 "github.com/cockroachdb/errors" 30 ) 31 32 // ErrClusterInitialized is reported when the Boostrap RPC is run on 33 // a node that is already part of an initialized cluster. 34 var ErrClusterInitialized = fmt.Errorf("cluster has already been initialized") 35 36 // initServer handles the bootstrapping process. It is instantiated early in the 37 // server startup sequence to determine whether a NodeID and ClusterID are 38 // available (true if and only if an initialized store is present). If all 39 // engines are empty, either a new cluster needs to be started (via incoming 40 // Bootstrap RPC) or an existing one joined. Either way, the goal is to learn a 41 // ClusterID and NodeID (and initialize at least one store). All of this 42 // subtlety is encapsulated by the initServer, which offers a primitive 43 // ServeAndWait() after which point the startup code can assume that the 44 // Node/ClusterIDs are known. 45 // 46 // TODO(tbg): at the time of writing, when joining an existing cluster for the 47 // first time, the initServer provides only the clusterID. Fix this by giving 48 // the initServer a *kv.DB that it can use to assign a NodeID and StoreID, and 49 // later by switching to the connect RPC (#32574). 50 type initServer struct { 51 mu struct { 52 syncutil.Mutex 53 // If set, a Bootstrap() call is rejected with this error. 54 rejectErr error 55 } 56 // The version at which to bootstrap the cluster in Bootstrap(). 57 bootstrapVersion roachpb.Version 58 // The zone configs to bootstrap with. 59 bootstrapZoneConfig, bootstrapSystemZoneConfig *zonepb.ZoneConfig 60 // The state of the engines. This tells us whether the node is already 61 // bootstrapped. The goal of the initServer is to complete this by the 62 // time ServeAndWait returns. 63 inspectState *initDiskState 64 65 // If Bootstrap() succeeds, resulting initState will go here (to be consumed 66 // by ServeAndWait). 67 bootstrapReqCh chan *initState 68 } 69 70 func setupInitServer( 71 ctx context.Context, 72 binaryVersion, binaryMinSupportedVersion roachpb.Version, 73 bootstrapVersion roachpb.Version, 74 bootstrapZoneConfig, bootstrapSystemZoneConfig *zonepb.ZoneConfig, 75 engines []storage.Engine, 76 ) (*initServer, error) { 77 inspectState, err := inspectEngines(ctx, engines, binaryVersion, binaryMinSupportedVersion) 78 if err != nil { 79 return nil, err 80 } 81 82 s := &initServer{ 83 bootstrapReqCh: make(chan *initState, 1), 84 85 inspectState: inspectState, 86 bootstrapVersion: bootstrapVersion, 87 bootstrapZoneConfig: bootstrapZoneConfig, 88 bootstrapSystemZoneConfig: bootstrapSystemZoneConfig, 89 } 90 91 if len(inspectState.initializedEngines) > 0 { 92 // We have a NodeID/ClusterID, so don't allow bootstrap. 93 s.mu.rejectErr = ErrClusterInitialized 94 } 95 96 return s, nil 97 } 98 99 // initDiskState contains the part of initState that is read from stable 100 // storage. 101 // 102 // TODO(tbg): the above is a lie in the case in which we join an existing 103 // cluster. In that case, the state returned from ServeAndWait will have the 104 // clusterID set from Gossip (and there will be no NodeID). The plan is to 105 // allocate the IDs in ServeAndWait itself eventually, at which point the 106 // lie disappears. 107 type initDiskState struct { 108 // nodeID is zero if joining an existing cluster. 109 // 110 // TODO(tbg): see TODO above. 111 nodeID roachpb.NodeID 112 // All fields below are always set. 113 clusterID uuid.UUID 114 clusterVersion clusterversion.ClusterVersion 115 initializedEngines []storage.Engine 116 newEngines []storage.Engine 117 } 118 119 // initState contains the cluster and node IDs as well as the stores, from which 120 // a CockroachDB server can be started up after ServeAndWait returns. 121 type initState struct { 122 initDiskState 123 // joined is true if this is a new node. Note that the initDiskState may 124 // reflect the result of bootstrapping a new cluster, i.e. it is not true 125 // that joined==true implies that the initDiskState shows no initialized 126 // engines. 127 // 128 // This flag should only be used for logging and reporting. A newly 129 // bootstrapped single-node cluster is functionally equivalent to one that 130 // restarted; any decisions should be made on persisted data instead of 131 // this flag. 132 // 133 // TODO(tbg): remove this bool. The Node can find out another way whether 134 // it just joined or restarted. 135 joined bool 136 // bootstrapped is true if a new cluster was initialized. If this is true, 137 // 'joined' above is also true. Usage of this field should follow that of 138 // 'joined' as well. 139 bootstrapped bool 140 } 141 142 // NeedsInit returns true if (and only if) none if the engines are initialized. 143 // In this case, server startup is blocked until either an initialized node 144 // is reached via Gossip, or this node itself is bootstrapped. 145 func (s *initServer) NeedsInit() bool { 146 return len(s.inspectState.initializedEngines) == 0 147 } 148 149 // ServeAndWait waits until the server is ready to bootstrap. In the common case 150 // of restarting an existing node, this immediately returns. When starting with 151 // a blank slate (i.e. only empty engines), it waits for incoming Bootstrap 152 // request or for Gossip to connect (whichever happens earlier). 153 // 154 // The returned initState may not reflect a bootstrapped cluster yet, but it 155 // is guaranteed to have a ClusterID set. 156 // 157 // This method must be called only once. 158 // 159 // TODO(tbg): give this a KV client and thus initialize at least one store in 160 // all cases. 161 func (s *initServer) ServeAndWait( 162 ctx context.Context, stopper *stop.Stopper, sv *settings.Values, g *gossip.Gossip, 163 ) (*initState, error) { 164 if !s.NeedsInit() { 165 // If already bootstrapped, return early. 166 return &initState{ 167 initDiskState: *s.inspectState, 168 joined: false, 169 bootstrapped: false, 170 }, nil 171 } 172 173 log.Info(ctx, "no stores bootstrapped and --join flag specified, awaiting "+ 174 "init command or join with an already initialized node.") 175 176 select { 177 case <-stopper.ShouldQuiesce(): 178 return nil, stop.ErrUnavailable 179 case state := <-s.bootstrapReqCh: 180 // Bootstrap() did its job. At this point, we know that the cluster 181 // version will be bootstrapVersion (=state.clusterVersion.Version), but 182 // the version setting does not know yet (it was initialized as 183 // BinaryMinSupportedVersion because the engines were all 184 // uninitialized). We *could* just let the server start, and it would 185 // populate system.settings, which is then gossiped, and then the 186 // callback would update the version, but we take this shortcut to avoid 187 // having every freshly bootstrapped cluster spend time at an old 188 // cluster version. 189 if err := clusterversion.Initialize(ctx, state.clusterVersion.Version, sv); err != nil { 190 return nil, err 191 } 192 193 log.Infof(ctx, "**** cluster %s has been created", state.clusterID) 194 return state, nil 195 case <-g.Connected: 196 // Gossip connected, that is, we know a ClusterID. Due to the early 197 // return above, we know that all of our engines are empty, i.e. we 198 // don't have a NodeID yet (and the cluster version is the minimum we 199 // support). Commence startup; the Node will realize it's short a NodeID 200 // and will request one. 201 // 202 // TODO(tbg): use a kv.DB to get NodeID and StoreIDs when necessary and 203 // set everything up here. This will take the Node out of that business 204 // entirely and means we'll need much fewer NodeID/ClusterIDContainers. 205 // (It's also so much simpler to think about). The RPC will also tell us 206 // a cluster version to use instead of the lowest possible one (reducing 207 // the short amount of time until the Gossip hook bumps the version); 208 // this doesn't fix anything but again, is simpler to think about. A 209 // gotcha that may not immediately be obvious is that we can never hope 210 // to have all stores initialized by the time ServeAndWait returns. This 211 // is because *if this server is already bootstrapped*, it might hold a 212 // replica of the range backing the StoreID allocating counter, and 213 // letting this server start may be necessary to restore quorum to that 214 // range. So in general, after this TODO, we will always leave this 215 // method with *at least one* store initialized, but not necessarily 216 // all. This is fine, since initializing additional stores later is 217 // easy. 218 clusterID, err := g.GetClusterID() 219 if err != nil { 220 return nil, err 221 } 222 s.inspectState.clusterID = clusterID 223 return &initState{ 224 initDiskState: *s.inspectState, 225 joined: true, 226 bootstrapped: false, 227 }, nil 228 } 229 } 230 231 var errInternalBootstrapError = errors.New("unable to bootstrap due to internal error") 232 233 // Bootstrap implements the serverpb.Init service. Users set up a new 234 // CockroachDB server by calling this endpoint on *exactly one node* in the 235 // cluster (retrying only on that node). 236 // Attempting to bootstrap a node that was already bootstrapped will result in 237 // an error. 238 // 239 // NB: there is no protection against users erroneously bootstrapping multiple 240 // nodes. In that case, they end up with more than one cluster, and nodes 241 // panicking or refusing to connect to each other. 242 func (s *initServer) Bootstrap( 243 ctx context.Context, _ *serverpb.BootstrapRequest, 244 ) (*serverpb.BootstrapResponse, error) { 245 // Bootstrap() only responds once. Everyone else gets an error, either 246 // ErrClusterInitialized (in the success case) or errInternalBootstrapError. 247 248 s.mu.Lock() 249 defer s.mu.Unlock() 250 251 if s.mu.rejectErr != nil { 252 return nil, s.mu.rejectErr 253 } 254 255 state, err := s.tryBootstrap(ctx) 256 if err != nil { 257 log.Errorf(ctx, "bootstrap: %v", err) 258 s.mu.rejectErr = errInternalBootstrapError 259 return nil, s.mu.rejectErr 260 } 261 s.mu.rejectErr = ErrClusterInitialized 262 s.bootstrapReqCh <- state 263 return &serverpb.BootstrapResponse{}, nil 264 } 265 266 func (s *initServer) tryBootstrap(ctx context.Context) (*initState, error) { 267 cv := clusterversion.ClusterVersion{Version: s.bootstrapVersion} 268 if err := kvserver.WriteClusterVersionToEngines(ctx, s.inspectState.newEngines, cv); err != nil { 269 return nil, err 270 } 271 return bootstrapCluster( 272 ctx, s.inspectState.newEngines, s.bootstrapZoneConfig, s.bootstrapSystemZoneConfig, 273 ) 274 } 275 276 // DiskClusterVersion returns the cluster version synthesized from disk. This 277 // is always non-zero since it falls back to the BinaryMinSupportedVersion. 278 func (s *initServer) DiskClusterVersion() clusterversion.ClusterVersion { 279 return s.inspectState.clusterVersion 280 }