github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cli/init.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package cli
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"os"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/server"
    21  	"github.com/cockroachdb/cockroach/pkg/server/serverpb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/contextutil"
    23  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    24  	"github.com/cockroachdb/errors"
    25  	"github.com/spf13/cobra"
    26  	"google.golang.org/grpc"
    27  )
    28  
    29  var initCmd = &cobra.Command{
    30  	Use:   "init",
    31  	Short: "initialize a cluster",
    32  	Long: `
    33  Perform one-time-only initialization of a CockroachDB cluster.
    34  
    35  After starting one or more nodes with --join flags, run the init
    36  command on one node (passing the same --host and certificate flags
    37  you would use for the sql command). The target of the init command
    38  must appear in the --join flags of other nodes.
    39  
    40  A node started without the --join flag initializes itself as a
    41  single-node cluster, so the init command is not used in that case.
    42  `,
    43  	Args: cobra.NoArgs,
    44  	RunE: maybeShoutError(MaybeDecorateGRPCError(runInit)),
    45  }
    46  
    47  func runInit(cmd *cobra.Command, args []string) error {
    48  	ctx, cancel := context.WithCancel(context.Background())
    49  	defer cancel()
    50  
    51  	// Wait for the node to be ready for initialization.
    52  	conn, finish, err := waitForClientReadinessAndGetClientGRPCConn(ctx)
    53  	if err != nil {
    54  		return err
    55  	}
    56  	defer finish()
    57  
    58  	// Actually perform cluster initialization.
    59  	c := serverpb.NewInitClient(conn)
    60  
    61  	if _, err = c.Bootstrap(ctx, &serverpb.BootstrapRequest{}); err != nil {
    62  		if strings.Contains(err.Error(), server.ErrClusterInitialized.Error()) {
    63  			// We really want to use errors.Is() here but this would require
    64  			// error serialization support in gRPC.
    65  			// This is not yet performed in CockroachDB even though
    66  			// the error library now has infrastructure to do so, see:
    67  			// https://github.com/cockroachdb/errors/pull/14
    68  			return errors.WithHint(err,
    69  				"Please ensure all your start commands are using --join.")
    70  		}
    71  		return err
    72  	}
    73  
    74  	fmt.Fprintln(os.Stdout, "Cluster successfully initialized")
    75  	return nil
    76  }
    77  
    78  // waitForClientReadinessAndGetClientGRPCConn waits for the node to
    79  // be ready for initialization. This check ensures that the `init`
    80  // command is less likely to fail because it was issued too
    81  // early. In general, retrying the `init` command is dangerous [0],
    82  // so we make a best effort at minimizing chances for users to
    83  // arrive in an uncomfortable situation.
    84  //
    85  // [0]: https://github.com/cockroachdb/cockroach/pull/19753#issuecomment-341561452
    86  func waitForClientReadinessAndGetClientGRPCConn(
    87  	ctx context.Context,
    88  ) (conn *grpc.ClientConn, finish func(), err error) {
    89  	defer func() {
    90  		// If we're returning with an error, tear down the gRPC connection
    91  		// that's been established, if any.
    92  		if finish != nil && err != nil {
    93  			finish()
    94  		}
    95  	}()
    96  
    97  	retryOpts := retry.Options{InitialBackoff: time.Second, MaxBackoff: time.Second}
    98  	for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
    99  		if err = contextutil.RunWithTimeout(ctx, "init-open-conn", 5*time.Second,
   100  			func(ctx context.Context) error {
   101  				// (Attempt to) establish the gRPC connection. If that fails,
   102  				// it may be that the server hasn't started to listen yet, in
   103  				// which case we'll retry.
   104  				conn, _, finish, err = getClientGRPCConn(ctx, serverCfg)
   105  				if err != nil {
   106  					return err
   107  				}
   108  
   109  				// Access the /health endpoint. Until/unless this succeeds, the
   110  				// node is not yet fully initialized and ready to accept
   111  				// Bootstrap requests.
   112  				ac := serverpb.NewAdminClient(conn)
   113  				_, err := ac.Health(ctx, &serverpb.HealthRequest{})
   114  				return err
   115  			}); err != nil {
   116  			err = errors.Wrapf(err, "node not ready to perform cluster initialization")
   117  			fmt.Fprintln(stderr, "warning:", err, "(retrying)")
   118  
   119  			// We're going to retry; first cancel the connection that's
   120  			// been established, if any.
   121  			if finish != nil {
   122  				finish()
   123  				finish = nil
   124  			}
   125  			// Then retry.
   126  			continue
   127  		}
   128  
   129  		// No error - connection was established and health endpoint is
   130  		// ready.
   131  		return conn, finish, err
   132  	}
   133  	err = errors.New("maximum number of retries exceeded")
   134  	return
   135  }