github.com/ethereum-optimism/optimism@v1.7.2/op-node/p2p/discovery.go (about) 1 package p2p 2 3 import ( 4 "bytes" 5 "context" 6 secureRand "crypto/rand" 7 "encoding/binary" 8 "fmt" 9 "io" 10 "math/rand" 11 "net" 12 "time" 13 14 decredSecp "github.com/decred/dcrd/dcrec/secp256k1/v4" 15 "github.com/libp2p/go-libp2p/core/crypto" 16 "github.com/libp2p/go-libp2p/core/network" 17 "github.com/libp2p/go-libp2p/core/peer" 18 "github.com/multiformats/go-multiaddr" 19 20 gcrypto "github.com/ethereum/go-ethereum/crypto" 21 "github.com/ethereum/go-ethereum/log" 22 "github.com/ethereum/go-ethereum/p2p/discover" 23 "github.com/ethereum/go-ethereum/p2p/enode" 24 "github.com/ethereum/go-ethereum/p2p/enr" 25 "github.com/ethereum/go-ethereum/rlp" 26 27 "github.com/btcsuite/btcd/blockchain" 28 "github.com/btcsuite/btcd/chaincfg/chainhash" 29 30 "github.com/ethereum-optimism/optimism/op-node/p2p/store" 31 "github.com/ethereum-optimism/optimism/op-node/rollup" 32 ) 33 34 // force to use the new chainhash module, and not the legacy chainhash package btcd module 35 const _ = chainhash.HashSize 36 37 // force to use the btcd module, while the keycard dependency in geth still depends on it, 38 // for go mod tidy to not clean up our explicit usage of v0.23.3, which resolves conflicts with the chainhash module 39 const _ = blockchain.CoinbaseWitnessDataLen 40 41 const ( 42 discoverIntervalFast = time.Second * 5 43 discoverIntervalSlow = time.Second * 20 44 connectionIntervalFast = time.Second * 5 45 connectionIntervalSlow = time.Second * 20 46 connectionWorkerCount = 4 47 connectionBufferSize = 10 48 discoveredNodesBuffer = 3 49 tableKickoffDelay = time.Second * 3 50 discoveredAddrTTL = time.Hour * 24 51 collectiveDialTimeout = time.Second * 30 52 ) 53 54 func (conf *Config) Discovery(log log.Logger, rollupCfg *rollup.Config, tcpPort uint16) (*enode.LocalNode, *discover.UDPv5, error) { 55 if conf.NoDiscovery { 56 return nil, nil, nil 57 } 58 priv := (*decredSecp.PrivateKey)(conf.Priv).ToECDSA() 59 // use the geth curve definition. Same crypto, but geth needs to detect it as *their* definition of the curve. 60 priv.Curve = gcrypto.S256() 61 localNode := enode.NewLocalNode(conf.DiscoveryDB, priv) 62 if conf.AdvertiseIP != nil { 63 localNode.SetStaticIP(conf.AdvertiseIP) 64 } 65 if conf.AdvertiseUDPPort != 0 { // explicitly advertised port gets priority 66 localNode.SetFallbackUDP(int(conf.AdvertiseUDPPort)) 67 } else if conf.ListenUDPPort != 0 { // otherwise default to the port we configured it to listen on 68 localNode.SetFallbackUDP(int(conf.ListenUDPPort)) 69 } 70 if conf.AdvertiseTCPPort != 0 { // explicitly advertised port gets priority 71 localNode.Set(enr.TCP(conf.AdvertiseTCPPort)) 72 } else if tcpPort != 0 { // otherwise try to pick up whatever port LibP2P binded to (listen port, or dynamically picked) 73 localNode.Set(enr.TCP(tcpPort)) 74 } else if conf.ListenTCPPort != 0 { // otherwise default to the port we configured it to listen on 75 localNode.Set(enr.TCP(conf.ListenTCPPort)) 76 } else { 77 return nil, nil, fmt.Errorf("no TCP port to put in discovery record") 78 } 79 dat := OpStackENRData{ 80 chainID: rollupCfg.L2ChainID.Uint64(), 81 version: 0, 82 } 83 localNode.Set(&dat) 84 85 udpAddr := &net.UDPAddr{ 86 IP: conf.ListenIP, 87 Port: int(conf.ListenUDPPort), 88 } 89 90 conn, err := net.ListenUDP("udp", udpAddr) 91 if err != nil { 92 return nil, nil, err 93 } 94 if udpAddr.Port == 0 { // if we picked a port dynamically, then find the port we got, and update our node record 95 localUDPAddr := conn.LocalAddr().(*net.UDPAddr) 96 localNode.SetFallbackUDP(localUDPAddr.Port) 97 } 98 99 cfg := discover.Config{ 100 PrivateKey: priv, 101 NetRestrict: conf.NetRestrict, 102 Bootnodes: conf.Bootnodes, 103 Unhandled: nil, // Not used in dv5 104 Log: log, 105 ValidSchemes: enode.ValidSchemes, 106 } 107 udpV5, err := discover.ListenV5(conn, localNode, cfg) 108 if err != nil { 109 return nil, nil, err 110 } 111 112 log.Info("started discovery service", "enr", localNode.Node(), "id", localNode.ID()) 113 114 // TODO: periodically we can pull the external IP and TCP port from libp2p NAT service, 115 // and add it as a statement to keep the localNode accurate (if we trust the NAT device more than the discv5 statements) 116 117 return localNode, udpV5, nil 118 } 119 120 // Secp256k1 is like the geth Secp256k1 enr entry type, but using the libp2p pubkey representation instead 121 type Secp256k1 crypto.Secp256k1PublicKey 122 123 func (v Secp256k1) ENRKey() string { return "secp256k1" } 124 125 // EncodeRLP implements rlp.Encoder. 126 func (v Secp256k1) EncodeRLP(w io.Writer) error { 127 return rlp.Encode(w, (*decredSecp.PublicKey)(&v).SerializeCompressed()) 128 } 129 130 // DecodeRLP implements rlp.Decoder. 131 func (v *Secp256k1) DecodeRLP(s *rlp.Stream) error { 132 buf, err := s.Bytes() 133 if err != nil { 134 return err 135 } 136 pk, err := decredSecp.ParsePubKey(buf) 137 if err != nil { 138 return err 139 } 140 *v = (Secp256k1)(*pk) 141 return nil 142 } 143 144 func enrToAddrInfo(r *enode.Node) (*peer.AddrInfo, *crypto.Secp256k1PublicKey, error) { 145 ip := r.IP() 146 ipScheme := "ip4" 147 if ip4 := ip.To4(); ip4 == nil { 148 ipScheme = "ip6" 149 } else { 150 ip = ip4 151 } 152 mAddr, err := multiaddr.NewMultiaddr(fmt.Sprintf("/%s/%s/tcp/%d", ipScheme, ip.String(), r.TCP())) 153 if err != nil { 154 return nil, nil, fmt.Errorf("could not construct multi addr: %w", err) 155 } 156 var enrPub Secp256k1 157 if err := r.Load(&enrPub); err != nil { 158 return nil, nil, fmt.Errorf("failed to load pubkey as libp2p pubkey type from ENR") 159 } 160 pub := (*crypto.Secp256k1PublicKey)(&enrPub) 161 peerID, err := peer.IDFromPublicKey(pub) 162 if err != nil { 163 return nil, pub, fmt.Errorf("could not compute peer ID from pubkey for multi-addr: %w", err) 164 } 165 return &peer.AddrInfo{ 166 ID: peerID, 167 Addrs: []multiaddr.Multiaddr{mAddr}, 168 }, pub, nil 169 } 170 171 // The discovery ENRs are just key-value lists, and we filter them by records tagged with the "opstack" key, 172 // and then check the chain ID and version. 173 type OpStackENRData struct { 174 chainID uint64 175 version uint64 176 } 177 178 func (o *OpStackENRData) ENRKey() string { 179 return "opstack" 180 } 181 182 func (o *OpStackENRData) EncodeRLP(w io.Writer) error { 183 out := make([]byte, 2*binary.MaxVarintLen64) 184 offset := binary.PutUvarint(out, o.chainID) 185 offset += binary.PutUvarint(out[offset:], o.version) 186 out = out[:offset] 187 // encode as byte-string 188 return rlp.Encode(w, out) 189 } 190 191 func (o *OpStackENRData) DecodeRLP(s *rlp.Stream) error { 192 b, err := s.Bytes() 193 if err != nil { 194 return fmt.Errorf("failed to decode outer ENR entry: %w", err) 195 } 196 // We don't check the byte length: the below readers are limited, and the ENR itself has size limits. 197 // Future "opstack" entries may contain additional data, and will be tagged with a newer version etc. 198 r := bytes.NewReader(b) 199 chainID, err := binary.ReadUvarint(r) 200 if err != nil { 201 return fmt.Errorf("failed to read chain ID var int: %w", err) 202 } 203 version, err := binary.ReadUvarint(r) 204 if err != nil { 205 return fmt.Errorf("failed to read version var int: %w", err) 206 } 207 o.chainID = chainID 208 o.version = version 209 return nil 210 } 211 212 var _ enr.Entry = (*OpStackENRData)(nil) 213 214 func FilterEnodes(log log.Logger, cfg *rollup.Config) func(node *enode.Node) bool { 215 return func(node *enode.Node) bool { 216 var dat OpStackENRData 217 err := node.Load(&dat) 218 // if the entry does not exist, or if it is invalid, then ignore the node 219 if err != nil { 220 log.Trace("discovered node record has no opstack info", "node", node.ID(), "err", err) 221 return false 222 } 223 // check chain ID matches 224 if cfg.L2ChainID.Uint64() != dat.chainID { 225 log.Trace("discovered node record has no matching chain ID", "node", node.ID(), "got", dat.chainID, "expected", cfg.L2ChainID.Uint64()) 226 return false 227 } 228 // check version matches 229 if dat.version != 0 { 230 log.Trace("discovered node record has no matching version", "node", node.ID(), "got", dat.version, "expected", 0) 231 return false 232 } 233 return true 234 } 235 } 236 237 // DiscoveryProcess runs a discovery process that randomly walks the DHT to fill the peerstore, 238 // and connects to nodes in the peerstore that we are not already connected to. 239 // Nodes from the peerstore will be shuffled, unsuccessful connection attempts will cause peers to be avoided, 240 // and only nodes with addresses (under TTL) will be connected to. 241 func (n *NodeP2P) DiscoveryProcess(ctx context.Context, log log.Logger, cfg *rollup.Config, connectGoal uint) { 242 if n.dv5Udp == nil { 243 log.Warn("peer discovery is disabled") 244 return 245 } 246 filter := FilterEnodes(log, cfg) 247 // We pull nodes from discv5 DHT in random order to find new peers. 248 // Eventually we'll find a peer record that matches our filter. 249 randomNodeIter := n.dv5Udp.RandomNodes() 250 251 randomNodeIter = enode.Filter(randomNodeIter, filter) 252 defer randomNodeIter.Close() 253 254 // We pull from the DHT in a slow/fast interval, depending on the need to find more peers 255 discoverTicker := time.NewTicker(discoverIntervalFast) 256 defer discoverTicker.Stop() 257 258 // We connect to the peers we know of to maintain a target, 259 // but do so with polling to avoid scanning the connection count continuously 260 connectTicker := time.NewTicker(connectionIntervalFast) 261 defer connectTicker.Stop() 262 263 // We can go faster/slower depending on the need 264 slower := func() { 265 discoverTicker.Reset(discoverIntervalSlow) 266 connectTicker.Reset(connectionIntervalSlow) 267 } 268 faster := func() { 269 discoverTicker.Reset(discoverIntervalFast) 270 connectTicker.Reset(connectionIntervalFast) 271 } 272 273 // We try to connect to peers in parallel: some may be slow to respond 274 connAttempts := make(chan peer.ID, connectionBufferSize) 275 connectWorker := func(ctx context.Context) { 276 for { 277 id, ok := <-connAttempts 278 if !ok { 279 return 280 } 281 addrs := n.Host().Peerstore().Addrs(id) 282 log.Info("attempting connection", "peer", id) 283 ctx, cancel := context.WithTimeout(ctx, time.Second*10) 284 err := n.Host().Connect(ctx, peer.AddrInfo{ID: id, Addrs: addrs}) 285 cancel() 286 if err != nil { 287 log.Debug("failed connection attempt", "peer", id, "err", err) 288 } 289 } 290 } 291 292 // stops all the workers when we are done 293 defer close(connAttempts) 294 // start workers to try connect to peers 295 for i := 0; i < connectionWorkerCount; i++ { 296 go connectWorker(ctx) 297 } 298 299 // buffer discovered nodes, so don't stall on the dht iteration as much 300 randomNodesCh := make(chan *enode.Node, discoveredNodesBuffer) 301 defer close(randomNodesCh) 302 bufferNodes := func() { 303 for { 304 select { 305 case <-discoverTicker.C: 306 if !randomNodeIter.Next() { 307 log.Info("discv5 DHT iteration stopped, closing peer discovery now...") 308 return 309 } 310 found := randomNodeIter.Node() 311 select { 312 // block once we have found enough nodes 313 case randomNodesCh <- found: 314 continue 315 case <-ctx.Done(): 316 return 317 } 318 case <-ctx.Done(): 319 return 320 } 321 } 322 } 323 // Walk the DHT in parallel, the discv5 interface does not use channels for the iteration 324 go bufferNodes() 325 326 // Kick off by trying the nodes we have in our table (previous nodes from last run and/or bootnodes) 327 go func() { 328 <-time.After(tableKickoffDelay) 329 // At the start we might have trouble walking the DHT, 330 // but we do have a table with some nodes, 331 // so take the table and feed it into the discovery process 332 for _, rec := range n.dv5Udp.AllNodes() { 333 if filter(rec) { 334 select { 335 case randomNodesCh <- rec: 336 continue 337 case <-ctx.Done(): 338 return 339 } 340 } 341 } 342 }() 343 344 pstore := n.Host().Peerstore() 345 for { 346 select { 347 case <-ctx.Done(): 348 log.Info("stopped peer discovery") 349 return // no ctx error, expected close 350 case found := <-randomNodesCh: 351 var dat OpStackENRData 352 if err := found.Load(&dat); err != nil { // we already filtered on chain ID and version 353 continue 354 } 355 info, pub, err := enrToAddrInfo(found) 356 if err != nil { 357 continue 358 } 359 360 // record metadata to the peerstore if it is an extended peerstore 361 if eps, ok := pstore.(store.ExtendedPeerstore); ok { 362 _, err := eps.SetPeerMetadata(info.ID, store.PeerMetadata{ 363 ENR: found.String(), 364 OPStackID: dat.chainID, 365 }) 366 if err != nil { 367 log.Warn("failed to set peer metadata", "peer", info.ID, "err", err) 368 } 369 } 370 // We add the addresses to the peerstore, and update the address TTL. 371 //After that we stop using the address, assuming it may not be valid anymore (until we rediscover the node) 372 pstore.AddAddrs(info.ID, info.Addrs, discoveredAddrTTL) 373 _ = pstore.AddPubKey(info.ID, pub) 374 375 // Tag the peer, we'd rather have the connection manager prune away old peers, 376 // or peers on different chains, or anyone we have not seen via discovery. 377 // There is no tag score decay yet, so just set it to 42. 378 n.ConnectionManager().TagPeer(info.ID, fmt.Sprintf("opstack-%d-%d", dat.chainID, dat.version), 42) 379 log.Debug("discovered peer", "peer", info.ID, "nodeID", found.ID(), "addr", info.Addrs[0]) 380 case <-connectTicker.C: 381 connected := n.Host().Network().Peers() 382 log.Debug("peering tick", "connected", len(connected), 383 "advertised_udp", n.dv5Local.Node().UDP(), 384 "advertised_tcp", n.dv5Local.Node().TCP(), 385 "advertised_ip", n.dv5Local.Node().IP()) 386 if uint(len(connected)) < connectGoal { 387 // Start looking for more peers more actively again 388 faster() 389 390 peersWithAddrs := n.Host().Peerstore().PeersWithAddrs() 391 if err := shufflePeers(peersWithAddrs); err != nil { 392 continue 393 } 394 395 existing := make(map[peer.ID]struct{}) 396 for _, p := range connected { 397 existing[p] = struct{}{} 398 } 399 400 // Keep using these peers, and don't try new discovery/connections. 401 // We don't need to search for more peers and try new connections if we already have plenty 402 ctx, cancel := context.WithTimeout(ctx, collectiveDialTimeout) 403 peerLoop: 404 for _, id := range peersWithAddrs { 405 // never dial ourselves 406 if n.Host().ID() == id { 407 continue 408 } 409 // skip peers that we are already connected to 410 if _, ok := existing[id]; ok { 411 continue 412 } 413 // skip peers that we were just connected to 414 if n.Host().Network().Connectedness(id) == network.CannotConnect { 415 continue 416 } 417 // schedule, if there is still space to schedule (this may block) 418 select { 419 case connAttempts <- id: 420 case <-ctx.Done(): 421 break peerLoop 422 } 423 } 424 cancel() 425 } else { 426 // we have enough connections, slow down actively filling the peerstore 427 slower() 428 } 429 } 430 } 431 } 432 433 // shuffle the slice of peer IDs in-place with a RNG seeded by secure randomness. 434 func shufflePeers(ids peer.IDSlice) error { 435 var x [8]byte // shuffling is not critical, just need to avoid basic predictability by outside peers 436 if _, err := io.ReadFull(secureRand.Reader, x[:]); err != nil { 437 return err 438 } 439 rng := rand.New(rand.NewSource(int64(binary.LittleEndian.Uint64(x[:])))) 440 rng.Shuffle(len(ids), ids.Swap) 441 return nil 442 }