github.com/grafana/pyroscope@v1.18.0/pkg/metastore/raftnode/node_bootstrap.go (about)

     1  package raftnode
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"slices"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/go-kit/log"
    12  	"github.com/go-kit/log/level"
    13  	"github.com/grafana/dskit/backoff"
    14  	"github.com/grafana/dskit/dns"
    15  	"github.com/hashicorp/raft"
    16  
    17  	"github.com/grafana/pyroscope/pkg/metastore/discovery"
    18  	"github.com/grafana/pyroscope/pkg/metastore/raftnode/raftnodepb"
    19  )
    20  
    21  func (n *Node) bootstrap() error {
    22  	peers, err := n.bootstrapPeersWithRetries()
    23  	if err != nil {
    24  		return fmt.Errorf("failed to resolve peers: %w", err)
    25  	}
    26  	logger := log.With(n.logger,
    27  		"server_id", n.config.ServerID,
    28  		"advertise_address", n.config.AdvertiseAddress,
    29  		"peers", fmt.Sprint(peers))
    30  	lastPeer := peers[len(peers)-1]
    31  	if raft.ServerAddress(n.config.AdvertiseAddress) != lastPeer.Address {
    32  		level.Info(logger).Log("msg", "not the bootstrap node, skipping")
    33  		return nil
    34  	}
    35  	level.Info(logger).Log("msg", "bootstrapping raft")
    36  	bootstrap := n.raft.BootstrapCluster(raft.Configuration{Servers: peers})
    37  	if bootstrapErr := bootstrap.Error(); bootstrapErr != nil {
    38  		if !errors.Is(bootstrapErr, raft.ErrCantBootstrap) {
    39  			return fmt.Errorf("failed to bootstrap raft: %w", bootstrapErr)
    40  		}
    41  	}
    42  	return nil
    43  }
    44  
    45  func (n *Node) bootstrapPeersWithRetries() (peers []raft.Server, err error) {
    46  	prov := dns.NewProvider(n.logger, n.reg, dns.MiekgdnsResolverType)
    47  	attempt := func() bool {
    48  		peers, err = n.bootstrapPeers(prov)
    49  		level.Debug(n.logger).Log("msg", "resolving bootstrap peers", "peers", fmt.Sprint(peers), "err", err)
    50  		if err != nil {
    51  			_ = level.Error(n.logger).Log("msg", "failed to resolve bootstrap peers", "err", err)
    52  			return false
    53  		}
    54  		return true
    55  	}
    56  	backoffConfig := backoff.Config{
    57  		MinBackoff: 1 * time.Second,
    58  		MaxBackoff: 10 * time.Second,
    59  		MaxRetries: 20,
    60  	}
    61  	backOff := backoff.New(context.Background(), backoffConfig)
    62  	for backOff.Ongoing() {
    63  		if !attempt() {
    64  			backOff.Wait()
    65  		} else {
    66  			return peers, nil
    67  		}
    68  	}
    69  	return nil, fmt.Errorf("failed to resolve bootstrap peers after %d retries %w", backOff.NumRetries(), err)
    70  }
    71  
    72  const autoJoinTimeout = 10 * time.Second
    73  
    74  func (n *Node) tryAutoJoin() error {
    75  	// we can only auto-join if there is a real raft cluster running
    76  	ctx, cancel := context.WithTimeout(context.Background(), autoJoinTimeout)
    77  	defer cancel()
    78  
    79  	readIndexResp, err := n.raftNodeClient.ReadIndex(ctx, &raftnodepb.ReadIndexRequest{})
    80  	if err != nil {
    81  		return fmt.Errorf("failed to get current term for auto-join: %w", err)
    82  	}
    83  
    84  	logger := log.With(n.logger,
    85  		"server_id", n.config.ServerID,
    86  		"advertise_address", n.config.AdvertiseAddress)
    87  
    88  	// try to join the cluster via the leader
    89  	level.Info(logger).Log("msg", "attempting to join existing cluster", "current_term", readIndexResp.Term)
    90  	_, err = n.raftNodeClient.AddNode(ctx, &raftnodepb.AddNodeRequest{
    91  		ServerId:    n.config.AdvertiseAddress,
    92  		CurrentTerm: readIndexResp.Term,
    93  	})
    94  
    95  	if err != nil {
    96  		return fmt.Errorf("failed to auto-join cluster: %w", err)
    97  	}
    98  
    99  	return nil
   100  }
   101  
   102  func (n *Node) bootstrapPeers(prov *dns.Provider) ([]raft.Server, error) {
   103  	// The peer list always includes the local node.
   104  	peers := make([]raft.Server, 0, len(n.config.BootstrapPeers)+1)
   105  	peers = append(peers, raft.Server{
   106  		Suffrage: raft.Voter,
   107  		ID:       raft.ServerID(n.config.ServerID),
   108  		Address:  raft.ServerAddress(n.config.AdvertiseAddress),
   109  	})
   110  	// Note that raft requires stable node IDs, therefore we're using
   111  	// the node FQDN:port for both purposes: as the identifier and as the
   112  	// address. This requires a DNS SRV record lookup without further
   113  	// resolution of A records (dnssrvnoa+).
   114  	//
   115  	// Alternatively, peers may be specified explicitly in the
   116  	// "{addr}</{node_id}>" format, where the node is the optional node
   117  	// identifier.
   118  	var resolve []string
   119  	for _, peer := range n.config.BootstrapPeers {
   120  		if strings.Contains(peer, "+") {
   121  			resolve = append(resolve, peer)
   122  		} else {
   123  			peers = append(peers, discovery.ParsePeer(peer))
   124  		}
   125  	}
   126  	if len(resolve) > 0 {
   127  		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   128  		defer cancel()
   129  		if err := prov.Resolve(ctx, resolve); err != nil {
   130  			return nil, fmt.Errorf("failed to resolve bootstrap peers: %w", err)
   131  		}
   132  		resolvedPeers := prov.Addresses()
   133  		if len(resolvedPeers) == 0 {
   134  			// The local node is the only one in the cluster, but peers
   135  			// are expected to be present. Stop here to avoid bootstrapping
   136  			// a single-node cluster.
   137  			return nil, fmt.Errorf("bootstrap peers can't be resolved")
   138  		}
   139  		for _, peer := range resolvedPeers {
   140  			peers = append(peers, raft.Server{
   141  				Suffrage: raft.Voter,
   142  				ID:       raft.ServerID(peer),
   143  				Address:  raft.ServerAddress(peer),
   144  			})
   145  		}
   146  	}
   147  	// Finally, we sort and deduplicate the peers: the first one
   148  	// is to boostrap the cluster. If there are nodes with distinct
   149  	// IDs but the same address, bootstrapping will fail.
   150  	slices.SortFunc(peers, func(a, b raft.Server) int {
   151  		return strings.Compare(string(a.ID), string(b.ID))
   152  	})
   153  	peers = slices.CompactFunc(peers, func(a, b raft.Server) bool {
   154  		return a.ID == b.ID
   155  	})
   156  	if len(peers) != n.config.BootstrapExpectPeers {
   157  		return nil, fmt.Errorf("expected number of bootstrap peers not reached: got %d, expected %d\n%+v",
   158  			len(peers), n.config.BootstrapExpectPeers, peers)
   159  	}
   160  	return peers, nil
   161  }