github.com/ethereum-optimism/optimism@v1.7.2/op-node/p2p/discovery.go (about)

     1  package p2p
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	secureRand "crypto/rand"
     7  	"encoding/binary"
     8  	"fmt"
     9  	"io"
    10  	"math/rand"
    11  	"net"
    12  	"time"
    13  
    14  	decredSecp "github.com/decred/dcrd/dcrec/secp256k1/v4"
    15  	"github.com/libp2p/go-libp2p/core/crypto"
    16  	"github.com/libp2p/go-libp2p/core/network"
    17  	"github.com/libp2p/go-libp2p/core/peer"
    18  	"github.com/multiformats/go-multiaddr"
    19  
    20  	gcrypto "github.com/ethereum/go-ethereum/crypto"
    21  	"github.com/ethereum/go-ethereum/log"
    22  	"github.com/ethereum/go-ethereum/p2p/discover"
    23  	"github.com/ethereum/go-ethereum/p2p/enode"
    24  	"github.com/ethereum/go-ethereum/p2p/enr"
    25  	"github.com/ethereum/go-ethereum/rlp"
    26  
    27  	"github.com/btcsuite/btcd/blockchain"
    28  	"github.com/btcsuite/btcd/chaincfg/chainhash"
    29  
    30  	"github.com/ethereum-optimism/optimism/op-node/p2p/store"
    31  	"github.com/ethereum-optimism/optimism/op-node/rollup"
    32  )
    33  
    34  // force to use the new chainhash module, and not the legacy chainhash package btcd module
    35  const _ = chainhash.HashSize
    36  
    37  // force to use the btcd module, while the keycard dependency in geth still depends on it,
    38  // for go mod tidy to not clean up our explicit usage of v0.23.3, which resolves conflicts with the chainhash module
    39  const _ = blockchain.CoinbaseWitnessDataLen
    40  
    41  const (
    42  	discoverIntervalFast   = time.Second * 5
    43  	discoverIntervalSlow   = time.Second * 20
    44  	connectionIntervalFast = time.Second * 5
    45  	connectionIntervalSlow = time.Second * 20
    46  	connectionWorkerCount  = 4
    47  	connectionBufferSize   = 10
    48  	discoveredNodesBuffer  = 3
    49  	tableKickoffDelay      = time.Second * 3
    50  	discoveredAddrTTL      = time.Hour * 24
    51  	collectiveDialTimeout  = time.Second * 30
    52  )
    53  
    54  func (conf *Config) Discovery(log log.Logger, rollupCfg *rollup.Config, tcpPort uint16) (*enode.LocalNode, *discover.UDPv5, error) {
    55  	if conf.NoDiscovery {
    56  		return nil, nil, nil
    57  	}
    58  	priv := (*decredSecp.PrivateKey)(conf.Priv).ToECDSA()
    59  	// use the geth curve definition. Same crypto, but geth needs to detect it as *their* definition of the curve.
    60  	priv.Curve = gcrypto.S256()
    61  	localNode := enode.NewLocalNode(conf.DiscoveryDB, priv)
    62  	if conf.AdvertiseIP != nil {
    63  		localNode.SetStaticIP(conf.AdvertiseIP)
    64  	}
    65  	if conf.AdvertiseUDPPort != 0 { // explicitly advertised port gets priority
    66  		localNode.SetFallbackUDP(int(conf.AdvertiseUDPPort))
    67  	} else if conf.ListenUDPPort != 0 { // otherwise default to the port we configured it to listen on
    68  		localNode.SetFallbackUDP(int(conf.ListenUDPPort))
    69  	}
    70  	if conf.AdvertiseTCPPort != 0 { // explicitly advertised port gets priority
    71  		localNode.Set(enr.TCP(conf.AdvertiseTCPPort))
    72  	} else if tcpPort != 0 { // otherwise try to pick up whatever port LibP2P binded to (listen port, or dynamically picked)
    73  		localNode.Set(enr.TCP(tcpPort))
    74  	} else if conf.ListenTCPPort != 0 { // otherwise default to the port we configured it to listen on
    75  		localNode.Set(enr.TCP(conf.ListenTCPPort))
    76  	} else {
    77  		return nil, nil, fmt.Errorf("no TCP port to put in discovery record")
    78  	}
    79  	dat := OpStackENRData{
    80  		chainID: rollupCfg.L2ChainID.Uint64(),
    81  		version: 0,
    82  	}
    83  	localNode.Set(&dat)
    84  
    85  	udpAddr := &net.UDPAddr{
    86  		IP:   conf.ListenIP,
    87  		Port: int(conf.ListenUDPPort),
    88  	}
    89  
    90  	conn, err := net.ListenUDP("udp", udpAddr)
    91  	if err != nil {
    92  		return nil, nil, err
    93  	}
    94  	if udpAddr.Port == 0 { // if we picked a port dynamically, then find the port we got, and update our node record
    95  		localUDPAddr := conn.LocalAddr().(*net.UDPAddr)
    96  		localNode.SetFallbackUDP(localUDPAddr.Port)
    97  	}
    98  
    99  	cfg := discover.Config{
   100  		PrivateKey:   priv,
   101  		NetRestrict:  conf.NetRestrict,
   102  		Bootnodes:    conf.Bootnodes,
   103  		Unhandled:    nil, // Not used in dv5
   104  		Log:          log,
   105  		ValidSchemes: enode.ValidSchemes,
   106  	}
   107  	udpV5, err := discover.ListenV5(conn, localNode, cfg)
   108  	if err != nil {
   109  		return nil, nil, err
   110  	}
   111  
   112  	log.Info("started discovery service", "enr", localNode.Node(), "id", localNode.ID())
   113  
   114  	// TODO: periodically we can pull the external IP and TCP port from libp2p NAT service,
   115  	// and add it as a statement to keep the localNode accurate (if we trust the NAT device more than the discv5 statements)
   116  
   117  	return localNode, udpV5, nil
   118  }
   119  
   120  // Secp256k1 is like the geth Secp256k1 enr entry type, but using the libp2p pubkey representation instead
   121  type Secp256k1 crypto.Secp256k1PublicKey
   122  
   123  func (v Secp256k1) ENRKey() string { return "secp256k1" }
   124  
   125  // EncodeRLP implements rlp.Encoder.
   126  func (v Secp256k1) EncodeRLP(w io.Writer) error {
   127  	return rlp.Encode(w, (*decredSecp.PublicKey)(&v).SerializeCompressed())
   128  }
   129  
   130  // DecodeRLP implements rlp.Decoder.
   131  func (v *Secp256k1) DecodeRLP(s *rlp.Stream) error {
   132  	buf, err := s.Bytes()
   133  	if err != nil {
   134  		return err
   135  	}
   136  	pk, err := decredSecp.ParsePubKey(buf)
   137  	if err != nil {
   138  		return err
   139  	}
   140  	*v = (Secp256k1)(*pk)
   141  	return nil
   142  }
   143  
   144  func enrToAddrInfo(r *enode.Node) (*peer.AddrInfo, *crypto.Secp256k1PublicKey, error) {
   145  	ip := r.IP()
   146  	ipScheme := "ip4"
   147  	if ip4 := ip.To4(); ip4 == nil {
   148  		ipScheme = "ip6"
   149  	} else {
   150  		ip = ip4
   151  	}
   152  	mAddr, err := multiaddr.NewMultiaddr(fmt.Sprintf("/%s/%s/tcp/%d", ipScheme, ip.String(), r.TCP()))
   153  	if err != nil {
   154  		return nil, nil, fmt.Errorf("could not construct multi addr: %w", err)
   155  	}
   156  	var enrPub Secp256k1
   157  	if err := r.Load(&enrPub); err != nil {
   158  		return nil, nil, fmt.Errorf("failed to load pubkey as libp2p pubkey type from ENR")
   159  	}
   160  	pub := (*crypto.Secp256k1PublicKey)(&enrPub)
   161  	peerID, err := peer.IDFromPublicKey(pub)
   162  	if err != nil {
   163  		return nil, pub, fmt.Errorf("could not compute peer ID from pubkey for multi-addr: %w", err)
   164  	}
   165  	return &peer.AddrInfo{
   166  		ID:    peerID,
   167  		Addrs: []multiaddr.Multiaddr{mAddr},
   168  	}, pub, nil
   169  }
   170  
   171  // The discovery ENRs are just key-value lists, and we filter them by records tagged with the "opstack" key,
   172  // and then check the chain ID and version.
   173  type OpStackENRData struct {
   174  	chainID uint64
   175  	version uint64
   176  }
   177  
   178  func (o *OpStackENRData) ENRKey() string {
   179  	return "opstack"
   180  }
   181  
   182  func (o *OpStackENRData) EncodeRLP(w io.Writer) error {
   183  	out := make([]byte, 2*binary.MaxVarintLen64)
   184  	offset := binary.PutUvarint(out, o.chainID)
   185  	offset += binary.PutUvarint(out[offset:], o.version)
   186  	out = out[:offset]
   187  	// encode as byte-string
   188  	return rlp.Encode(w, out)
   189  }
   190  
   191  func (o *OpStackENRData) DecodeRLP(s *rlp.Stream) error {
   192  	b, err := s.Bytes()
   193  	if err != nil {
   194  		return fmt.Errorf("failed to decode outer ENR entry: %w", err)
   195  	}
   196  	// We don't check the byte length: the below readers are limited, and the ENR itself has size limits.
   197  	// Future "opstack" entries may contain additional data, and will be tagged with a newer version etc.
   198  	r := bytes.NewReader(b)
   199  	chainID, err := binary.ReadUvarint(r)
   200  	if err != nil {
   201  		return fmt.Errorf("failed to read chain ID var int: %w", err)
   202  	}
   203  	version, err := binary.ReadUvarint(r)
   204  	if err != nil {
   205  		return fmt.Errorf("failed to read version var int: %w", err)
   206  	}
   207  	o.chainID = chainID
   208  	o.version = version
   209  	return nil
   210  }
   211  
   212  var _ enr.Entry = (*OpStackENRData)(nil)
   213  
   214  func FilterEnodes(log log.Logger, cfg *rollup.Config) func(node *enode.Node) bool {
   215  	return func(node *enode.Node) bool {
   216  		var dat OpStackENRData
   217  		err := node.Load(&dat)
   218  		// if the entry does not exist, or if it is invalid, then ignore the node
   219  		if err != nil {
   220  			log.Trace("discovered node record has no opstack info", "node", node.ID(), "err", err)
   221  			return false
   222  		}
   223  		// check chain ID matches
   224  		if cfg.L2ChainID.Uint64() != dat.chainID {
   225  			log.Trace("discovered node record has no matching chain ID", "node", node.ID(), "got", dat.chainID, "expected", cfg.L2ChainID.Uint64())
   226  			return false
   227  		}
   228  		// check version matches
   229  		if dat.version != 0 {
   230  			log.Trace("discovered node record has no matching version", "node", node.ID(), "got", dat.version, "expected", 0)
   231  			return false
   232  		}
   233  		return true
   234  	}
   235  }
   236  
   237  // DiscoveryProcess runs a discovery process that randomly walks the DHT to fill the peerstore,
   238  // and connects to nodes in the peerstore that we are not already connected to.
   239  // Nodes from the peerstore will be shuffled, unsuccessful connection attempts will cause peers to be avoided,
   240  // and only nodes with addresses (under TTL) will be connected to.
   241  func (n *NodeP2P) DiscoveryProcess(ctx context.Context, log log.Logger, cfg *rollup.Config, connectGoal uint) {
   242  	if n.dv5Udp == nil {
   243  		log.Warn("peer discovery is disabled")
   244  		return
   245  	}
   246  	filter := FilterEnodes(log, cfg)
   247  	// We pull nodes from discv5 DHT in random order to find new peers.
   248  	// Eventually we'll find a peer record that matches our filter.
   249  	randomNodeIter := n.dv5Udp.RandomNodes()
   250  
   251  	randomNodeIter = enode.Filter(randomNodeIter, filter)
   252  	defer randomNodeIter.Close()
   253  
   254  	// We pull from the DHT in a slow/fast interval, depending on the need to find more peers
   255  	discoverTicker := time.NewTicker(discoverIntervalFast)
   256  	defer discoverTicker.Stop()
   257  
   258  	// We connect to the peers we know of to maintain a target,
   259  	// but do so with polling to avoid scanning the connection count continuously
   260  	connectTicker := time.NewTicker(connectionIntervalFast)
   261  	defer connectTicker.Stop()
   262  
   263  	// We can go faster/slower depending on the need
   264  	slower := func() {
   265  		discoverTicker.Reset(discoverIntervalSlow)
   266  		connectTicker.Reset(connectionIntervalSlow)
   267  	}
   268  	faster := func() {
   269  		discoverTicker.Reset(discoverIntervalFast)
   270  		connectTicker.Reset(connectionIntervalFast)
   271  	}
   272  
   273  	// We try to connect to peers in parallel: some may be slow to respond
   274  	connAttempts := make(chan peer.ID, connectionBufferSize)
   275  	connectWorker := func(ctx context.Context) {
   276  		for {
   277  			id, ok := <-connAttempts
   278  			if !ok {
   279  				return
   280  			}
   281  			addrs := n.Host().Peerstore().Addrs(id)
   282  			log.Info("attempting connection", "peer", id)
   283  			ctx, cancel := context.WithTimeout(ctx, time.Second*10)
   284  			err := n.Host().Connect(ctx, peer.AddrInfo{ID: id, Addrs: addrs})
   285  			cancel()
   286  			if err != nil {
   287  				log.Debug("failed connection attempt", "peer", id, "err", err)
   288  			}
   289  		}
   290  	}
   291  
   292  	// stops all the workers when we are done
   293  	defer close(connAttempts)
   294  	// start workers to try connect to peers
   295  	for i := 0; i < connectionWorkerCount; i++ {
   296  		go connectWorker(ctx)
   297  	}
   298  
   299  	// buffer discovered nodes, so don't stall on the dht iteration as much
   300  	randomNodesCh := make(chan *enode.Node, discoveredNodesBuffer)
   301  	defer close(randomNodesCh)
   302  	bufferNodes := func() {
   303  		for {
   304  			select {
   305  			case <-discoverTicker.C:
   306  				if !randomNodeIter.Next() {
   307  					log.Info("discv5 DHT iteration stopped, closing peer discovery now...")
   308  					return
   309  				}
   310  				found := randomNodeIter.Node()
   311  				select {
   312  				// block once we have found enough nodes
   313  				case randomNodesCh <- found:
   314  					continue
   315  				case <-ctx.Done():
   316  					return
   317  				}
   318  			case <-ctx.Done():
   319  				return
   320  			}
   321  		}
   322  	}
   323  	// Walk the DHT in parallel, the discv5 interface does not use channels for the iteration
   324  	go bufferNodes()
   325  
   326  	// Kick off by trying the nodes we have in our table (previous nodes from last run and/or bootnodes)
   327  	go func() {
   328  		<-time.After(tableKickoffDelay)
   329  		// At the start we might have trouble walking the DHT,
   330  		// but we do have a table with some nodes,
   331  		// so take the table and feed it into the discovery process
   332  		for _, rec := range n.dv5Udp.AllNodes() {
   333  			if filter(rec) {
   334  				select {
   335  				case randomNodesCh <- rec:
   336  					continue
   337  				case <-ctx.Done():
   338  					return
   339  				}
   340  			}
   341  		}
   342  	}()
   343  
   344  	pstore := n.Host().Peerstore()
   345  	for {
   346  		select {
   347  		case <-ctx.Done():
   348  			log.Info("stopped peer discovery")
   349  			return // no ctx error, expected close
   350  		case found := <-randomNodesCh:
   351  			var dat OpStackENRData
   352  			if err := found.Load(&dat); err != nil { // we already filtered on chain ID and version
   353  				continue
   354  			}
   355  			info, pub, err := enrToAddrInfo(found)
   356  			if err != nil {
   357  				continue
   358  			}
   359  
   360  			// record metadata to the peerstore if it is an extended peerstore
   361  			if eps, ok := pstore.(store.ExtendedPeerstore); ok {
   362  				_, err := eps.SetPeerMetadata(info.ID, store.PeerMetadata{
   363  					ENR:       found.String(),
   364  					OPStackID: dat.chainID,
   365  				})
   366  				if err != nil {
   367  					log.Warn("failed to set peer metadata", "peer", info.ID, "err", err)
   368  				}
   369  			}
   370  			// We add the addresses to the peerstore, and update the address TTL.
   371  			//After that we stop using the address, assuming it may not be valid anymore (until we rediscover the node)
   372  			pstore.AddAddrs(info.ID, info.Addrs, discoveredAddrTTL)
   373  			_ = pstore.AddPubKey(info.ID, pub)
   374  
   375  			// Tag the peer, we'd rather have the connection manager prune away old peers,
   376  			// or peers on different chains, or anyone we have not seen via discovery.
   377  			// There is no tag score decay yet, so just set it to 42.
   378  			n.ConnectionManager().TagPeer(info.ID, fmt.Sprintf("opstack-%d-%d", dat.chainID, dat.version), 42)
   379  			log.Debug("discovered peer", "peer", info.ID, "nodeID", found.ID(), "addr", info.Addrs[0])
   380  		case <-connectTicker.C:
   381  			connected := n.Host().Network().Peers()
   382  			log.Debug("peering tick", "connected", len(connected),
   383  				"advertised_udp", n.dv5Local.Node().UDP(),
   384  				"advertised_tcp", n.dv5Local.Node().TCP(),
   385  				"advertised_ip", n.dv5Local.Node().IP())
   386  			if uint(len(connected)) < connectGoal {
   387  				// Start looking for more peers more actively again
   388  				faster()
   389  
   390  				peersWithAddrs := n.Host().Peerstore().PeersWithAddrs()
   391  				if err := shufflePeers(peersWithAddrs); err != nil {
   392  					continue
   393  				}
   394  
   395  				existing := make(map[peer.ID]struct{})
   396  				for _, p := range connected {
   397  					existing[p] = struct{}{}
   398  				}
   399  
   400  				// Keep using these peers, and don't try new discovery/connections.
   401  				// We don't need to search for more peers and try new connections if we already have plenty
   402  				ctx, cancel := context.WithTimeout(ctx, collectiveDialTimeout)
   403  			peerLoop:
   404  				for _, id := range peersWithAddrs {
   405  					// never dial ourselves
   406  					if n.Host().ID() == id {
   407  						continue
   408  					}
   409  					// skip peers that we are already connected to
   410  					if _, ok := existing[id]; ok {
   411  						continue
   412  					}
   413  					// skip peers that we were just connected to
   414  					if n.Host().Network().Connectedness(id) == network.CannotConnect {
   415  						continue
   416  					}
   417  					// schedule, if there is still space to schedule (this may block)
   418  					select {
   419  					case connAttempts <- id:
   420  					case <-ctx.Done():
   421  						break peerLoop
   422  					}
   423  				}
   424  				cancel()
   425  			} else {
   426  				// we have enough connections, slow down actively filling the peerstore
   427  				slower()
   428  			}
   429  		}
   430  	}
   431  }
   432  
   433  // shuffle the slice of peer IDs in-place with a RNG seeded by secure randomness.
   434  func shufflePeers(ids peer.IDSlice) error {
   435  	var x [8]byte // shuffling is not critical, just need to avoid basic predictability by outside peers
   436  	if _, err := io.ReadFull(secureRand.Reader, x[:]); err != nil {
   437  		return err
   438  	}
   439  	rng := rand.New(rand.NewSource(int64(binary.LittleEndian.Uint64(x[:]))))
   440  	rng.Shuffle(len(ids), ids.Swap)
   441  	return nil
   442  }