github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/init.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package server
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/clusterversion"
    18  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    19  	"github.com/cockroachdb/cockroach/pkg/gossip"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/server/serverpb"
    23  	"github.com/cockroachdb/cockroach/pkg/settings"
    24  	"github.com/cockroachdb/cockroach/pkg/storage"
    25  	"github.com/cockroachdb/cockroach/pkg/util/log"
    26  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    27  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  // ErrClusterInitialized is reported when the Boostrap RPC is run on
    33  // a node that is already part of an initialized cluster.
    34  var ErrClusterInitialized = fmt.Errorf("cluster has already been initialized")
    35  
    36  // initServer handles the bootstrapping process. It is instantiated early in the
    37  // server startup sequence to determine whether a NodeID and ClusterID are
    38  // available (true if and only if an initialized store is present). If all
    39  // engines are empty, either a new cluster needs to be started (via incoming
    40  // Bootstrap RPC) or an existing one joined. Either way, the goal is to learn a
    41  // ClusterID and NodeID (and initialize at least one store). All of this
    42  // subtlety is encapsulated by the initServer, which offers a primitive
    43  // ServeAndWait() after which point the startup code can assume that the
    44  // Node/ClusterIDs are known.
    45  //
    46  // TODO(tbg): at the time of writing, when joining an existing cluster for the
    47  // first time, the initServer provides only the clusterID. Fix this by giving
    48  // the initServer a *kv.DB that it can use to assign a NodeID and StoreID, and
    49  // later by switching to the connect RPC (#32574).
    50  type initServer struct {
    51  	mu struct {
    52  		syncutil.Mutex
    53  		// If set, a Bootstrap() call is rejected with this error.
    54  		rejectErr error
    55  	}
    56  	// The version at which to bootstrap the cluster in Bootstrap().
    57  	bootstrapVersion roachpb.Version
    58  	// The zone configs to bootstrap with.
    59  	bootstrapZoneConfig, bootstrapSystemZoneConfig *zonepb.ZoneConfig
    60  	// The state of the engines. This tells us whether the node is already
    61  	// bootstrapped. The goal of the initServer is to complete this by the
    62  	// time ServeAndWait returns.
    63  	inspectState *initDiskState
    64  
    65  	// If Bootstrap() succeeds, resulting initState will go here (to be consumed
    66  	// by ServeAndWait).
    67  	bootstrapReqCh chan *initState
    68  }
    69  
    70  func setupInitServer(
    71  	ctx context.Context,
    72  	binaryVersion, binaryMinSupportedVersion roachpb.Version,
    73  	bootstrapVersion roachpb.Version,
    74  	bootstrapZoneConfig, bootstrapSystemZoneConfig *zonepb.ZoneConfig,
    75  	engines []storage.Engine,
    76  ) (*initServer, error) {
    77  	inspectState, err := inspectEngines(ctx, engines, binaryVersion, binaryMinSupportedVersion)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  
    82  	s := &initServer{
    83  		bootstrapReqCh: make(chan *initState, 1),
    84  
    85  		inspectState:              inspectState,
    86  		bootstrapVersion:          bootstrapVersion,
    87  		bootstrapZoneConfig:       bootstrapZoneConfig,
    88  		bootstrapSystemZoneConfig: bootstrapSystemZoneConfig,
    89  	}
    90  
    91  	if len(inspectState.initializedEngines) > 0 {
    92  		// We have a NodeID/ClusterID, so don't allow bootstrap.
    93  		s.mu.rejectErr = ErrClusterInitialized
    94  	}
    95  
    96  	return s, nil
    97  }
    98  
    99  // initDiskState contains the part of initState that is read from stable
   100  // storage.
   101  //
   102  // TODO(tbg): the above is a lie in the case in which we join an existing
   103  // cluster. In that case, the state returned from ServeAndWait will have the
   104  // clusterID set from Gossip (and there will be no NodeID). The plan is to
   105  // allocate the IDs in ServeAndWait itself eventually, at which point the
   106  // lie disappears.
   107  type initDiskState struct {
   108  	// nodeID is zero if joining an existing cluster.
   109  	//
   110  	// TODO(tbg): see TODO above.
   111  	nodeID roachpb.NodeID
   112  	// All fields below are always set.
   113  	clusterID          uuid.UUID
   114  	clusterVersion     clusterversion.ClusterVersion
   115  	initializedEngines []storage.Engine
   116  	newEngines         []storage.Engine
   117  }
   118  
   119  // initState contains the cluster and node IDs as well as the stores, from which
   120  // a CockroachDB server can be started up after ServeAndWait returns.
   121  type initState struct {
   122  	initDiskState
   123  	// joined is true if this is a new node. Note that the initDiskState may
   124  	// reflect the result of bootstrapping a new cluster, i.e. it is not true
   125  	// that joined==true implies that the initDiskState shows no initialized
   126  	// engines.
   127  	//
   128  	// This flag should only be used for logging and reporting. A newly
   129  	// bootstrapped single-node cluster is functionally equivalent to one that
   130  	// restarted; any decisions should be made on persisted data instead of
   131  	// this flag.
   132  	//
   133  	// TODO(tbg): remove this bool. The Node can find out another way whether
   134  	// it just joined or restarted.
   135  	joined bool
   136  	// bootstrapped is true if a new cluster was initialized. If this is true,
   137  	// 'joined' above is also true. Usage of this field should follow that of
   138  	// 'joined' as well.
   139  	bootstrapped bool
   140  }
   141  
   142  // NeedsInit returns true if (and only if) none if the engines are initialized.
   143  // In this case, server startup is blocked until either an initialized node
   144  // is reached via Gossip, or this node itself is bootstrapped.
   145  func (s *initServer) NeedsInit() bool {
   146  	return len(s.inspectState.initializedEngines) == 0
   147  }
   148  
   149  // ServeAndWait waits until the server is ready to bootstrap. In the common case
   150  // of restarting an existing node, this immediately returns. When starting with
   151  // a blank slate (i.e. only empty engines), it waits for incoming Bootstrap
   152  // request or for Gossip to connect (whichever happens earlier).
   153  //
   154  // The returned initState may not reflect a bootstrapped cluster yet, but it
   155  // is guaranteed to have a ClusterID set.
   156  //
   157  // This method must be called only once.
   158  //
   159  // TODO(tbg): give this a KV client and thus initialize at least one store in
   160  // all cases.
   161  func (s *initServer) ServeAndWait(
   162  	ctx context.Context, stopper *stop.Stopper, sv *settings.Values, g *gossip.Gossip,
   163  ) (*initState, error) {
   164  	if !s.NeedsInit() {
   165  		// If already bootstrapped, return early.
   166  		return &initState{
   167  			initDiskState: *s.inspectState,
   168  			joined:        false,
   169  			bootstrapped:  false,
   170  		}, nil
   171  	}
   172  
   173  	log.Info(ctx, "no stores bootstrapped and --join flag specified, awaiting "+
   174  		"init command or join with an already initialized node.")
   175  
   176  	select {
   177  	case <-stopper.ShouldQuiesce():
   178  		return nil, stop.ErrUnavailable
   179  	case state := <-s.bootstrapReqCh:
   180  		// Bootstrap() did its job. At this point, we know that the cluster
   181  		// version will be bootstrapVersion (=state.clusterVersion.Version), but
   182  		// the version setting does not know yet (it was initialized as
   183  		// BinaryMinSupportedVersion because the engines were all
   184  		// uninitialized). We *could* just let the server start, and it would
   185  		// populate system.settings, which is then gossiped, and then the
   186  		// callback would update the version, but we take this shortcut to avoid
   187  		// having every freshly bootstrapped cluster spend time at an old
   188  		// cluster version.
   189  		if err := clusterversion.Initialize(ctx, state.clusterVersion.Version, sv); err != nil {
   190  			return nil, err
   191  		}
   192  
   193  		log.Infof(ctx, "**** cluster %s has been created", state.clusterID)
   194  		return state, nil
   195  	case <-g.Connected:
   196  		// Gossip connected, that is, we know a ClusterID. Due to the early
   197  		// return above, we know that all of our engines are empty, i.e. we
   198  		// don't have a NodeID yet (and the cluster version is the minimum we
   199  		// support). Commence startup; the Node will realize it's short a NodeID
   200  		// and will request one.
   201  		//
   202  		// TODO(tbg): use a kv.DB to get NodeID and StoreIDs when necessary and
   203  		// set everything up here. This will take the Node out of that business
   204  		// entirely and means we'll need much fewer NodeID/ClusterIDContainers.
   205  		// (It's also so much simpler to think about). The RPC will also tell us
   206  		// a cluster version to use instead of the lowest possible one (reducing
   207  		// the short amount of time until the Gossip hook bumps the version);
   208  		// this doesn't fix anything but again, is simpler to think about. A
   209  		// gotcha that may not immediately be obvious is that we can never hope
   210  		// to have all stores initialized by the time ServeAndWait returns. This
   211  		// is because *if this server is already bootstrapped*, it might hold a
   212  		// replica of the range backing the StoreID allocating counter, and
   213  		// letting this server start may be necessary to restore quorum to that
   214  		// range. So in general, after this TODO, we will always leave this
   215  		// method with *at least one* store initialized, but not necessarily
   216  		// all. This is fine, since initializing additional stores later is
   217  		// easy.
   218  		clusterID, err := g.GetClusterID()
   219  		if err != nil {
   220  			return nil, err
   221  		}
   222  		s.inspectState.clusterID = clusterID
   223  		return &initState{
   224  			initDiskState: *s.inspectState,
   225  			joined:        true,
   226  			bootstrapped:  false,
   227  		}, nil
   228  	}
   229  }
   230  
   231  var errInternalBootstrapError = errors.New("unable to bootstrap due to internal error")
   232  
   233  // Bootstrap implements the serverpb.Init service. Users set up a new
   234  // CockroachDB server by calling this endpoint on *exactly one node* in the
   235  // cluster (retrying only on that node).
   236  // Attempting to bootstrap a node that was already bootstrapped will result in
   237  // an error.
   238  //
   239  // NB: there is no protection against users erroneously bootstrapping multiple
   240  // nodes. In that case, they end up with more than one cluster, and nodes
   241  // panicking or refusing to connect to each other.
   242  func (s *initServer) Bootstrap(
   243  	ctx context.Context, _ *serverpb.BootstrapRequest,
   244  ) (*serverpb.BootstrapResponse, error) {
   245  	// Bootstrap() only responds once. Everyone else gets an error, either
   246  	// ErrClusterInitialized (in the success case) or errInternalBootstrapError.
   247  
   248  	s.mu.Lock()
   249  	defer s.mu.Unlock()
   250  
   251  	if s.mu.rejectErr != nil {
   252  		return nil, s.mu.rejectErr
   253  	}
   254  
   255  	state, err := s.tryBootstrap(ctx)
   256  	if err != nil {
   257  		log.Errorf(ctx, "bootstrap: %v", err)
   258  		s.mu.rejectErr = errInternalBootstrapError
   259  		return nil, s.mu.rejectErr
   260  	}
   261  	s.mu.rejectErr = ErrClusterInitialized
   262  	s.bootstrapReqCh <- state
   263  	return &serverpb.BootstrapResponse{}, nil
   264  }
   265  
   266  func (s *initServer) tryBootstrap(ctx context.Context) (*initState, error) {
   267  	cv := clusterversion.ClusterVersion{Version: s.bootstrapVersion}
   268  	if err := kvserver.WriteClusterVersionToEngines(ctx, s.inspectState.newEngines, cv); err != nil {
   269  		return nil, err
   270  	}
   271  	return bootstrapCluster(
   272  		ctx, s.inspectState.newEngines, s.bootstrapZoneConfig, s.bootstrapSystemZoneConfig,
   273  	)
   274  }
   275  
   276  // DiskClusterVersion returns the cluster version synthesized from disk. This
   277  // is always non-zero since it falls back to the BinaryMinSupportedVersion.
   278  func (s *initServer) DiskClusterVersion() clusterversion.ClusterVersion {
   279  	return s.inspectState.clusterVersion
   280  }