github.com/aquanetwork/aquachain@v1.7.8/p2p/simulations/network.go (about) 1 // Copyright 2017 The aquachain Authors 2 // This file is part of the aquachain library. 3 // 4 // The aquachain library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The aquachain library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the aquachain library. If not, see <http://www.gnu.org/licenses/>. 16 17 package simulations 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/json" 23 "fmt" 24 "sync" 25 "time" 26 27 "gitlab.com/aquachain/aquachain/aqua/event" 28 "gitlab.com/aquachain/aquachain/common/log" 29 "gitlab.com/aquachain/aquachain/p2p" 30 "gitlab.com/aquachain/aquachain/p2p/discover" 31 "gitlab.com/aquachain/aquachain/p2p/simulations/adapters" 32 ) 33 34 var dialBanTimeout = 200 * time.Millisecond 35 36 // NetworkConfig defines configuration options for starting a Network 37 type NetworkConfig struct { 38 ID string `json:"id"` 39 DefaultService string `json:"default_service,omitempty"` 40 } 41 42 // Network models a p2p simulation network which consists of a collection of 43 // simulated nodes and the connections which exist between them. 44 // 45 // The Network has a single NodeAdapter which is responsible for actually 46 // starting nodes and connecting them together. 47 // 48 // The Network emits events when nodes are started and stopped, when they are 49 // connected and disconnected, and also when messages are sent between nodes. 50 type Network struct { 51 NetworkConfig 52 53 Nodes []*Node `json:"nodes"` 54 nodeMap map[discover.NodeID]int 55 56 Conns []*Conn `json:"conns"` 57 connMap map[string]int 58 59 nodeAdapter adapters.NodeAdapter 60 events event.Feed 61 lock sync.RWMutex 62 quitc chan struct{} 63 } 64 65 // NewNetwork returns a Network which uses the given NodeAdapter and NetworkConfig 66 func NewNetwork(nodeAdapter adapters.NodeAdapter, conf *NetworkConfig) *Network { 67 return &Network{ 68 NetworkConfig: *conf, 69 nodeAdapter: nodeAdapter, 70 nodeMap: make(map[discover.NodeID]int), 71 connMap: make(map[string]int), 72 quitc: make(chan struct{}), 73 } 74 } 75 76 // Events returns the output event feed of the Network. 77 func (self *Network) Events() *event.Feed { 78 return &self.events 79 } 80 81 // NewNode adds a new node to the network with a random ID 82 func (self *Network) NewNode() (*Node, error) { 83 conf := adapters.RandomNodeConfig() 84 conf.Services = []string{self.DefaultService} 85 return self.NewNodeWithConfig(conf) 86 } 87 88 // NewNodeWithConfig adds a new node to the network with the given config, 89 // returning an error if a node with the same ID or name already exists 90 func (self *Network) NewNodeWithConfig(conf *adapters.NodeConfig) (*Node, error) { 91 self.lock.Lock() 92 defer self.lock.Unlock() 93 94 // create a random ID and PrivateKey if not set 95 if conf.ID == (discover.NodeID{}) { 96 c := adapters.RandomNodeConfig() 97 conf.ID = c.ID 98 conf.PrivateKey = c.PrivateKey 99 } 100 id := conf.ID 101 if conf.Reachable == nil { 102 conf.Reachable = func(otherID discover.NodeID) bool { 103 _, err := self.InitConn(conf.ID, otherID) 104 return err == nil 105 } 106 } 107 108 // assign a name to the node if not set 109 if conf.Name == "" { 110 conf.Name = fmt.Sprintf("node%02d", len(self.Nodes)+1) 111 } 112 113 // check the node doesn't already exist 114 if node := self.getNode(id); node != nil { 115 return nil, fmt.Errorf("node with ID %q already exists", id) 116 } 117 if node := self.getNodeByName(conf.Name); node != nil { 118 return nil, fmt.Errorf("node with name %q already exists", conf.Name) 119 } 120 121 // if no services are configured, use the default service 122 if len(conf.Services) == 0 { 123 conf.Services = []string{self.DefaultService} 124 } 125 126 // use the NodeAdapter to create the node 127 adapterNode, err := self.nodeAdapter.NewNode(conf) 128 if err != nil { 129 return nil, err 130 } 131 node := &Node{ 132 Node: adapterNode, 133 Config: conf, 134 } 135 log.Trace(fmt.Sprintf("node %v created", id)) 136 self.nodeMap[id] = len(self.Nodes) 137 self.Nodes = append(self.Nodes, node) 138 139 // emit a "control" event 140 self.events.Send(ControlEvent(node)) 141 142 return node, nil 143 } 144 145 // Config returns the network configuration 146 func (self *Network) Config() *NetworkConfig { 147 return &self.NetworkConfig 148 } 149 150 // StartAll starts all nodes in the network 151 func (self *Network) StartAll() error { 152 for _, node := range self.Nodes { 153 if node.Up { 154 continue 155 } 156 if err := self.Start(node.ID()); err != nil { 157 return err 158 } 159 } 160 return nil 161 } 162 163 // StopAll stops all nodes in the network 164 func (self *Network) StopAll() error { 165 for _, node := range self.Nodes { 166 if !node.Up { 167 continue 168 } 169 if err := self.Stop(node.ID()); err != nil { 170 return err 171 } 172 } 173 return nil 174 } 175 176 // Start starts the node with the given ID 177 func (self *Network) Start(id discover.NodeID) error { 178 return self.startWithSnapshots(id, nil) 179 } 180 181 // startWithSnapshots starts the node with the given ID using the give 182 // snapshots 183 func (self *Network) startWithSnapshots(id discover.NodeID, snapshots map[string][]byte) error { 184 node := self.GetNode(id) 185 if node == nil { 186 return fmt.Errorf("node %v does not exist", id) 187 } 188 if node.Up { 189 return fmt.Errorf("node %v already up", id) 190 } 191 log.Trace(fmt.Sprintf("starting node %v: %v using %v", id, node.Up, self.nodeAdapter.Name())) 192 if err := node.Start(snapshots); err != nil { 193 log.Warn(fmt.Sprintf("start up failed: %v", err)) 194 return err 195 } 196 node.Up = true 197 log.Info(fmt.Sprintf("started node %v: %v", id, node.Up)) 198 199 self.events.Send(NewEvent(node)) 200 201 // subscribe to peer events 202 client, err := node.Client() 203 if err != nil { 204 return fmt.Errorf("error getting rpc client for node %v: %s", id, err) 205 } 206 events := make(chan *p2p.PeerEvent) 207 sub, err := client.Subscribe(context.Background(), "admin", events, "peerEvents") 208 if err != nil { 209 return fmt.Errorf("error getting peer events for node %v: %s", id, err) 210 } 211 go self.watchPeerEvents(id, events, sub) 212 return nil 213 } 214 215 // watchPeerEvents reads peer events from the given channel and emits 216 // corresponding network events 217 func (self *Network) watchPeerEvents(id discover.NodeID, events chan *p2p.PeerEvent, sub event.Subscription) { 218 defer func() { 219 sub.Unsubscribe() 220 221 // assume the node is now down 222 self.lock.Lock() 223 node := self.getNode(id) 224 node.Up = false 225 self.lock.Unlock() 226 self.events.Send(NewEvent(node)) 227 }() 228 for { 229 select { 230 case event, ok := <-events: 231 if !ok { 232 return 233 } 234 peer := event.Peer 235 switch event.Type { 236 237 case p2p.PeerEventTypeAdd: 238 self.DidConnect(id, peer) 239 240 case p2p.PeerEventTypeDrop: 241 self.DidDisconnect(id, peer) 242 243 case p2p.PeerEventTypeMsgSend: 244 self.DidSend(id, peer, event.Protocol, *event.MsgCode) 245 246 case p2p.PeerEventTypeMsgRecv: 247 self.DidReceive(peer, id, event.Protocol, *event.MsgCode) 248 249 } 250 251 case err := <-sub.Err(): 252 if err != nil { 253 log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err) 254 } 255 return 256 } 257 } 258 } 259 260 // Stop stops the node with the given ID 261 func (self *Network) Stop(id discover.NodeID) error { 262 node := self.GetNode(id) 263 if node == nil { 264 return fmt.Errorf("node %v does not exist", id) 265 } 266 if !node.Up { 267 return fmt.Errorf("node %v already down", id) 268 } 269 if err := node.Stop(); err != nil { 270 return err 271 } 272 node.Up = false 273 log.Info(fmt.Sprintf("stop node %v: %v", id, node.Up)) 274 275 self.events.Send(ControlEvent(node)) 276 return nil 277 } 278 279 // Connect connects two nodes together by calling the "admin_addPeer" RPC 280 // method on the "one" node so that it connects to the "other" node 281 func (self *Network) Connect(oneID, otherID discover.NodeID) error { 282 log.Debug(fmt.Sprintf("connecting %s to %s", oneID, otherID)) 283 conn, err := self.InitConn(oneID, otherID) 284 if err != nil { 285 return err 286 } 287 client, err := conn.one.Client() 288 if err != nil { 289 return err 290 } 291 self.events.Send(ControlEvent(conn)) 292 return client.Call(nil, "admin_addPeer", string(conn.other.Addr())) 293 } 294 295 // Disconnect disconnects two nodes by calling the "admin_removePeer" RPC 296 // method on the "one" node so that it disconnects from the "other" node 297 func (self *Network) Disconnect(oneID, otherID discover.NodeID) error { 298 conn := self.GetConn(oneID, otherID) 299 if conn == nil { 300 return fmt.Errorf("connection between %v and %v does not exist", oneID, otherID) 301 } 302 if !conn.Up { 303 return fmt.Errorf("%v and %v already disconnected", oneID, otherID) 304 } 305 client, err := conn.one.Client() 306 if err != nil { 307 return err 308 } 309 self.events.Send(ControlEvent(conn)) 310 return client.Call(nil, "admin_removePeer", string(conn.other.Addr())) 311 } 312 313 // DidConnect tracks the fact that the "one" node connected to the "other" node 314 func (self *Network) DidConnect(one, other discover.NodeID) error { 315 conn, err := self.GetOrCreateConn(one, other) 316 if err != nil { 317 return fmt.Errorf("connection between %v and %v does not exist", one, other) 318 } 319 if conn.Up { 320 return fmt.Errorf("%v and %v already connected", one, other) 321 } 322 conn.Up = true 323 self.events.Send(NewEvent(conn)) 324 return nil 325 } 326 327 // DidDisconnect tracks the fact that the "one" node disconnected from the 328 // "other" node 329 func (self *Network) DidDisconnect(one, other discover.NodeID) error { 330 conn := self.GetConn(one, other) 331 if conn == nil { 332 return fmt.Errorf("connection between %v and %v does not exist", one, other) 333 } 334 if !conn.Up { 335 return fmt.Errorf("%v and %v already disconnected", one, other) 336 } 337 conn.Up = false 338 conn.initiated = time.Now().Add(-dialBanTimeout) 339 self.events.Send(NewEvent(conn)) 340 return nil 341 } 342 343 // DidSend tracks the fact that "sender" sent a message to "receiver" 344 func (self *Network) DidSend(sender, receiver discover.NodeID, proto string, code uint64) error { 345 msg := &Msg{ 346 One: sender, 347 Other: receiver, 348 Protocol: proto, 349 Code: code, 350 Received: false, 351 } 352 self.events.Send(NewEvent(msg)) 353 return nil 354 } 355 356 // DidReceive tracks the fact that "receiver" received a message from "sender" 357 func (self *Network) DidReceive(sender, receiver discover.NodeID, proto string, code uint64) error { 358 msg := &Msg{ 359 One: sender, 360 Other: receiver, 361 Protocol: proto, 362 Code: code, 363 Received: true, 364 } 365 self.events.Send(NewEvent(msg)) 366 return nil 367 } 368 369 // GetNode gets the node with the given ID, returning nil if the node does not 370 // exist 371 func (self *Network) GetNode(id discover.NodeID) *Node { 372 self.lock.Lock() 373 defer self.lock.Unlock() 374 return self.getNode(id) 375 } 376 377 // GetNode gets the node with the given name, returning nil if the node does 378 // not exist 379 func (self *Network) GetNodeByName(name string) *Node { 380 self.lock.Lock() 381 defer self.lock.Unlock() 382 return self.getNodeByName(name) 383 } 384 385 func (self *Network) getNode(id discover.NodeID) *Node { 386 i, found := self.nodeMap[id] 387 if !found { 388 return nil 389 } 390 return self.Nodes[i] 391 } 392 393 func (self *Network) getNodeByName(name string) *Node { 394 for _, node := range self.Nodes { 395 if node.Config.Name == name { 396 return node 397 } 398 } 399 return nil 400 } 401 402 // GetNodes returns the existing nodes 403 func (self *Network) GetNodes() (nodes []*Node) { 404 self.lock.Lock() 405 defer self.lock.Unlock() 406 407 nodes = append(nodes, self.Nodes...) 408 return nodes 409 } 410 411 // GetConn returns the connection which exists between "one" and "other" 412 // regardless of which node initiated the connection 413 func (self *Network) GetConn(oneID, otherID discover.NodeID) *Conn { 414 self.lock.Lock() 415 defer self.lock.Unlock() 416 return self.getConn(oneID, otherID) 417 } 418 419 // GetOrCreateConn is like GetConn but creates the connection if it doesn't 420 // already exist 421 func (self *Network) GetOrCreateConn(oneID, otherID discover.NodeID) (*Conn, error) { 422 self.lock.Lock() 423 defer self.lock.Unlock() 424 return self.getOrCreateConn(oneID, otherID) 425 } 426 427 func (self *Network) getOrCreateConn(oneID, otherID discover.NodeID) (*Conn, error) { 428 if conn := self.getConn(oneID, otherID); conn != nil { 429 return conn, nil 430 } 431 432 one := self.getNode(oneID) 433 if one == nil { 434 return nil, fmt.Errorf("node %v does not exist", oneID) 435 } 436 other := self.getNode(otherID) 437 if other == nil { 438 return nil, fmt.Errorf("node %v does not exist", otherID) 439 } 440 conn := &Conn{ 441 One: oneID, 442 Other: otherID, 443 one: one, 444 other: other, 445 } 446 label := ConnLabel(oneID, otherID) 447 self.connMap[label] = len(self.Conns) 448 self.Conns = append(self.Conns, conn) 449 return conn, nil 450 } 451 452 func (self *Network) getConn(oneID, otherID discover.NodeID) *Conn { 453 label := ConnLabel(oneID, otherID) 454 i, found := self.connMap[label] 455 if !found { 456 return nil 457 } 458 return self.Conns[i] 459 } 460 461 // InitConn(one, other) retrieves the connectiton model for the connection between 462 // peers one and other, or creates a new one if it does not exist 463 // the order of nodes does not matter, i.e., Conn(i,j) == Conn(j, i) 464 // it checks if the connection is already up, and if the nodes are running 465 // NOTE: 466 // it also checks whether there has been recent attempt to connect the peers 467 // this is cheating as the simulation is used as an oracle and know about 468 // remote peers attempt to connect to a node which will then not initiate the connection 469 func (self *Network) InitConn(oneID, otherID discover.NodeID) (*Conn, error) { 470 self.lock.Lock() 471 defer self.lock.Unlock() 472 if oneID == otherID { 473 return nil, fmt.Errorf("refusing to connect to self %v", oneID) 474 } 475 conn, err := self.getOrCreateConn(oneID, otherID) 476 if err != nil { 477 return nil, err 478 } 479 if time.Since(conn.initiated) < dialBanTimeout { 480 return nil, fmt.Errorf("connection between %v and %v recently attempted", oneID, otherID) 481 } 482 if conn.Up { 483 return nil, fmt.Errorf("%v and %v already connected", oneID, otherID) 484 } 485 err = conn.nodesUp() 486 if err != nil { 487 return nil, fmt.Errorf("nodes not up: %v", err) 488 } 489 conn.initiated = time.Now() 490 return conn, nil 491 } 492 493 // Shutdown stops all nodes in the network and closes the quit channel 494 func (self *Network) Shutdown() { 495 for _, node := range self.Nodes { 496 log.Debug(fmt.Sprintf("stopping node %s", node.ID().TerminalString())) 497 if err := node.Stop(); err != nil { 498 log.Warn(fmt.Sprintf("error stopping node %s", node.ID().TerminalString()), "err", err) 499 } 500 } 501 close(self.quitc) 502 } 503 504 //Reset resets all network properties: 505 //emtpies the nodes and the connection list 506 func (self *Network) Reset() { 507 self.lock.Lock() 508 defer self.lock.Unlock() 509 510 //re-initialize the maps 511 self.connMap = make(map[string]int) 512 self.nodeMap = make(map[discover.NodeID]int) 513 514 self.Nodes = nil 515 self.Conns = nil 516 } 517 518 // Node is a wrapper around adapters.Node which is used to track the status 519 // of a node in the network 520 type Node struct { 521 adapters.Node `json:"-"` 522 523 // Config if the config used to created the node 524 Config *adapters.NodeConfig `json:"config"` 525 526 // Up tracks whether or not the node is running 527 Up bool `json:"up"` 528 } 529 530 // ID returns the ID of the node 531 func (self *Node) ID() discover.NodeID { 532 return self.Config.ID 533 } 534 535 // String returns a log-friendly string 536 func (self *Node) String() string { 537 return fmt.Sprintf("Node %v", self.ID().TerminalString()) 538 } 539 540 // NodeInfo returns information about the node 541 func (self *Node) NodeInfo() *p2p.NodeInfo { 542 // avoid a panic if the node is not started yet 543 if self.Node == nil { 544 return nil 545 } 546 info := self.Node.NodeInfo() 547 info.Name = self.Config.Name 548 return info 549 } 550 551 // MarshalJSON implements the json.Marshaler interface so that the encoded 552 // JSON includes the NodeInfo 553 func (self *Node) MarshalJSON() ([]byte, error) { 554 return json.Marshal(struct { 555 Info *p2p.NodeInfo `json:"info,omitempty"` 556 Config *adapters.NodeConfig `json:"config,omitempty"` 557 Up bool `json:"up"` 558 }{ 559 Info: self.NodeInfo(), 560 Config: self.Config, 561 Up: self.Up, 562 }) 563 } 564 565 // Conn represents a connection between two nodes in the network 566 type Conn struct { 567 // One is the node which initiated the connection 568 One discover.NodeID `json:"one"` 569 570 // Other is the node which the connection was made to 571 Other discover.NodeID `json:"other"` 572 573 // Up tracks whether or not the connection is active 574 Up bool `json:"up"` 575 // Registers when the connection was grabbed to dial 576 initiated time.Time 577 578 one *Node 579 other *Node 580 } 581 582 // nodesUp returns whether both nodes are currently up 583 func (self *Conn) nodesUp() error { 584 if !self.one.Up { 585 return fmt.Errorf("one %v is not up", self.One) 586 } 587 if !self.other.Up { 588 return fmt.Errorf("other %v is not up", self.Other) 589 } 590 return nil 591 } 592 593 // String returns a log-friendly string 594 func (self *Conn) String() string { 595 return fmt.Sprintf("Conn %v->%v", self.One.TerminalString(), self.Other.TerminalString()) 596 } 597 598 // Msg represents a p2p message sent between two nodes in the network 599 type Msg struct { 600 One discover.NodeID `json:"one"` 601 Other discover.NodeID `json:"other"` 602 Protocol string `json:"protocol"` 603 Code uint64 `json:"code"` 604 Received bool `json:"received"` 605 } 606 607 // String returns a log-friendly string 608 func (self *Msg) String() string { 609 return fmt.Sprintf("Msg(%d) %v->%v", self.Code, self.One.TerminalString(), self.Other.TerminalString()) 610 } 611 612 // ConnLabel generates a deterministic string which represents a connection 613 // between two nodes, used to compare if two connections are between the same 614 // nodes 615 func ConnLabel(source, target discover.NodeID) string { 616 var first, second discover.NodeID 617 if bytes.Compare(source.Bytes(), target.Bytes()) > 0 { 618 first = target 619 second = source 620 } else { 621 first = source 622 second = target 623 } 624 return fmt.Sprintf("%v-%v", first, second) 625 } 626 627 // Snapshot represents the state of a network at a single point in time and can 628 // be used to restore the state of a network 629 type Snapshot struct { 630 Nodes []NodeSnapshot `json:"nodes,omitempty"` 631 Conns []Conn `json:"conns,omitempty"` 632 } 633 634 // NodeSnapshot represents the state of a node in the network 635 type NodeSnapshot struct { 636 Node Node `json:"node,omitempty"` 637 638 // Snapshots is arbitrary data gathered from calling node.Snapshots() 639 Snapshots map[string][]byte `json:"snapshots,omitempty"` 640 } 641 642 // Snapshot creates a network snapshot 643 func (self *Network) Snapshot() (*Snapshot, error) { 644 self.lock.Lock() 645 defer self.lock.Unlock() 646 snap := &Snapshot{ 647 Nodes: make([]NodeSnapshot, len(self.Nodes)), 648 Conns: make([]Conn, len(self.Conns)), 649 } 650 for i, node := range self.Nodes { 651 snap.Nodes[i] = NodeSnapshot{Node: *node} 652 if !node.Up { 653 continue 654 } 655 snapshots, err := node.Snapshots() 656 if err != nil { 657 return nil, err 658 } 659 snap.Nodes[i].Snapshots = snapshots 660 } 661 for i, conn := range self.Conns { 662 snap.Conns[i] = *conn 663 } 664 return snap, nil 665 } 666 667 // Load loads a network snapshot 668 func (self *Network) Load(snap *Snapshot) error { 669 for _, n := range snap.Nodes { 670 if _, err := self.NewNodeWithConfig(n.Node.Config); err != nil { 671 return err 672 } 673 if !n.Node.Up { 674 continue 675 } 676 if err := self.startWithSnapshots(n.Node.Config.ID, n.Snapshots); err != nil { 677 return err 678 } 679 } 680 for _, conn := range snap.Conns { 681 682 if !self.GetNode(conn.One).Up || !self.GetNode(conn.Other).Up { 683 //in this case, at least one of the nodes of a connection is not up, 684 //so it would result in the snapshot `Load` to fail 685 continue 686 } 687 if err := self.Connect(conn.One, conn.Other); err != nil { 688 return err 689 } 690 } 691 return nil 692 } 693 694 // Subscribe reads control events from a channel and executes them 695 func (self *Network) Subscribe(events chan *Event) { 696 for { 697 select { 698 case event, ok := <-events: 699 if !ok { 700 return 701 } 702 if event.Control { 703 self.executeControlEvent(event) 704 } 705 case <-self.quitc: 706 return 707 } 708 } 709 } 710 711 func (self *Network) executeControlEvent(event *Event) { 712 log.Trace("execute control event", "type", event.Type, "event", event) 713 switch event.Type { 714 case EventTypeNode: 715 if err := self.executeNodeEvent(event); err != nil { 716 log.Error("error executing node event", "event", event, "err", err) 717 } 718 case EventTypeConn: 719 if err := self.executeConnEvent(event); err != nil { 720 log.Error("error executing conn event", "event", event, "err", err) 721 } 722 case EventTypeMsg: 723 log.Warn("ignoring control msg event") 724 } 725 } 726 727 func (self *Network) executeNodeEvent(e *Event) error { 728 if !e.Node.Up { 729 return self.Stop(e.Node.ID()) 730 } 731 732 if _, err := self.NewNodeWithConfig(e.Node.Config); err != nil { 733 return err 734 } 735 return self.Start(e.Node.ID()) 736 } 737 738 func (self *Network) executeConnEvent(e *Event) error { 739 if e.Conn.Up { 740 return self.Connect(e.Conn.One, e.Conn.Other) 741 } else { 742 return self.Disconnect(e.Conn.One, e.Conn.Other) 743 } 744 }