github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/p2p/simulations/network.go (about) 1 // This file is part of the go-sberex library. The go-sberex library is 2 // free software: you can redistribute it and/or modify it under the terms 3 // of the GNU Lesser General Public License as published by the Free 4 // Software Foundation, either version 3 of the License, or (at your option) 5 // any later version. 6 // 7 // The go-sberex library is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 10 // General Public License <http://www.gnu.org/licenses/> for more details. 11 12 package simulations 13 14 import ( 15 "bytes" 16 "context" 17 "encoding/json" 18 "fmt" 19 "sync" 20 "time" 21 22 "github.com/Sberex/go-sberex/event" 23 "github.com/Sberex/go-sberex/log" 24 "github.com/Sberex/go-sberex/p2p" 25 "github.com/Sberex/go-sberex/p2p/discover" 26 "github.com/Sberex/go-sberex/p2p/simulations/adapters" 27 ) 28 29 var dialBanTimeout = 200 * time.Millisecond 30 31 // NetworkConfig defines configuration options for starting a Network 32 type NetworkConfig struct { 33 ID string `json:"id"` 34 DefaultService string `json:"default_service,omitempty"` 35 } 36 37 // Network models a p2p simulation network which consists of a collection of 38 // simulated nodes and the connections which exist between them. 39 // 40 // The Network has a single NodeAdapter which is responsible for actually 41 // starting nodes and connecting them together. 42 // 43 // The Network emits events when nodes are started and stopped, when they are 44 // connected and disconnected, and also when messages are sent between nodes. 45 type Network struct { 46 NetworkConfig 47 48 Nodes []*Node `json:"nodes"` 49 nodeMap map[discover.NodeID]int 50 51 Conns []*Conn `json:"conns"` 52 connMap map[string]int 53 54 nodeAdapter adapters.NodeAdapter 55 events event.Feed 56 lock sync.RWMutex 57 quitc chan struct{} 58 } 59 60 // NewNetwork returns a Network which uses the given NodeAdapter and NetworkConfig 61 func NewNetwork(nodeAdapter adapters.NodeAdapter, conf *NetworkConfig) *Network { 62 return &Network{ 63 NetworkConfig: *conf, 64 nodeAdapter: nodeAdapter, 65 nodeMap: make(map[discover.NodeID]int), 66 connMap: make(map[string]int), 67 quitc: make(chan struct{}), 68 } 69 } 70 71 // Events returns the output event feed of the Network. 72 func (self *Network) Events() *event.Feed { 73 return &self.events 74 } 75 76 // NewNode adds a new node to the network with a random ID 77 func (self *Network) NewNode() (*Node, error) { 78 conf := adapters.RandomNodeConfig() 79 conf.Services = []string{self.DefaultService} 80 return self.NewNodeWithConfig(conf) 81 } 82 83 // NewNodeWithConfig adds a new node to the network with the given config, 84 // returning an error if a node with the same ID or name already exists 85 func (self *Network) NewNodeWithConfig(conf *adapters.NodeConfig) (*Node, error) { 86 self.lock.Lock() 87 defer self.lock.Unlock() 88 89 // create a random ID and PrivateKey if not set 90 if conf.ID == (discover.NodeID{}) { 91 c := adapters.RandomNodeConfig() 92 conf.ID = c.ID 93 conf.PrivateKey = c.PrivateKey 94 } 95 id := conf.ID 96 if conf.Reachable == nil { 97 conf.Reachable = func(otherID discover.NodeID) bool { 98 _, err := self.InitConn(conf.ID, otherID) 99 return err == nil 100 } 101 } 102 103 // assign a name to the node if not set 104 if conf.Name == "" { 105 conf.Name = fmt.Sprintf("node%02d", len(self.Nodes)+1) 106 } 107 108 // check the node doesn't already exist 109 if node := self.getNode(id); node != nil { 110 return nil, fmt.Errorf("node with ID %q already exists", id) 111 } 112 if node := self.getNodeByName(conf.Name); node != nil { 113 return nil, fmt.Errorf("node with name %q already exists", conf.Name) 114 } 115 116 // if no services are configured, use the default service 117 if len(conf.Services) == 0 { 118 conf.Services = []string{self.DefaultService} 119 } 120 121 // use the NodeAdapter to create the node 122 adapterNode, err := self.nodeAdapter.NewNode(conf) 123 if err != nil { 124 return nil, err 125 } 126 node := &Node{ 127 Node: adapterNode, 128 Config: conf, 129 } 130 log.Trace(fmt.Sprintf("node %v created", id)) 131 self.nodeMap[id] = len(self.Nodes) 132 self.Nodes = append(self.Nodes, node) 133 134 // emit a "control" event 135 self.events.Send(ControlEvent(node)) 136 137 return node, nil 138 } 139 140 // Config returns the network configuration 141 func (self *Network) Config() *NetworkConfig { 142 return &self.NetworkConfig 143 } 144 145 // StartAll starts all nodes in the network 146 func (self *Network) StartAll() error { 147 for _, node := range self.Nodes { 148 if node.Up { 149 continue 150 } 151 if err := self.Start(node.ID()); err != nil { 152 return err 153 } 154 } 155 return nil 156 } 157 158 // StopAll stops all nodes in the network 159 func (self *Network) StopAll() error { 160 for _, node := range self.Nodes { 161 if !node.Up { 162 continue 163 } 164 if err := self.Stop(node.ID()); err != nil { 165 return err 166 } 167 } 168 return nil 169 } 170 171 // Start starts the node with the given ID 172 func (self *Network) Start(id discover.NodeID) error { 173 return self.startWithSnapshots(id, nil) 174 } 175 176 // startWithSnapshots starts the node with the given ID using the give 177 // snapshots 178 func (self *Network) startWithSnapshots(id discover.NodeID, snapshots map[string][]byte) error { 179 node := self.GetNode(id) 180 if node == nil { 181 return fmt.Errorf("node %v does not exist", id) 182 } 183 if node.Up { 184 return fmt.Errorf("node %v already up", id) 185 } 186 log.Trace(fmt.Sprintf("starting node %v: %v using %v", id, node.Up, self.nodeAdapter.Name())) 187 if err := node.Start(snapshots); err != nil { 188 log.Warn(fmt.Sprintf("start up failed: %v", err)) 189 return err 190 } 191 node.Up = true 192 log.Info(fmt.Sprintf("started node %v: %v", id, node.Up)) 193 194 self.events.Send(NewEvent(node)) 195 196 // subscribe to peer events 197 client, err := node.Client() 198 if err != nil { 199 return fmt.Errorf("error getting rpc client for node %v: %s", id, err) 200 } 201 events := make(chan *p2p.PeerEvent) 202 sub, err := client.Subscribe(context.Background(), "admin", events, "peerEvents") 203 if err != nil { 204 return fmt.Errorf("error getting peer events for node %v: %s", id, err) 205 } 206 go self.watchPeerEvents(id, events, sub) 207 return nil 208 } 209 210 // watchPeerEvents reads peer events from the given channel and emits 211 // corresponding network events 212 func (self *Network) watchPeerEvents(id discover.NodeID, events chan *p2p.PeerEvent, sub event.Subscription) { 213 defer func() { 214 sub.Unsubscribe() 215 216 // assume the node is now down 217 self.lock.Lock() 218 node := self.getNode(id) 219 node.Up = false 220 self.lock.Unlock() 221 self.events.Send(NewEvent(node)) 222 }() 223 for { 224 select { 225 case event, ok := <-events: 226 if !ok { 227 return 228 } 229 peer := event.Peer 230 switch event.Type { 231 232 case p2p.PeerEventTypeAdd: 233 self.DidConnect(id, peer) 234 235 case p2p.PeerEventTypeDrop: 236 self.DidDisconnect(id, peer) 237 238 case p2p.PeerEventTypeMsgSend: 239 self.DidSend(id, peer, event.Protocol, *event.MsgCode) 240 241 case p2p.PeerEventTypeMsgRecv: 242 self.DidReceive(peer, id, event.Protocol, *event.MsgCode) 243 244 } 245 246 case err := <-sub.Err(): 247 if err != nil { 248 log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err) 249 } 250 return 251 } 252 } 253 } 254 255 // Stop stops the node with the given ID 256 func (self *Network) Stop(id discover.NodeID) error { 257 node := self.GetNode(id) 258 if node == nil { 259 return fmt.Errorf("node %v does not exist", id) 260 } 261 if !node.Up { 262 return fmt.Errorf("node %v already down", id) 263 } 264 if err := node.Stop(); err != nil { 265 return err 266 } 267 node.Up = false 268 log.Info(fmt.Sprintf("stop node %v: %v", id, node.Up)) 269 270 self.events.Send(ControlEvent(node)) 271 return nil 272 } 273 274 // Connect connects two nodes together by calling the "admin_addPeer" RPC 275 // method on the "one" node so that it connects to the "other" node 276 func (self *Network) Connect(oneID, otherID discover.NodeID) error { 277 log.Debug(fmt.Sprintf("connecting %s to %s", oneID, otherID)) 278 conn, err := self.InitConn(oneID, otherID) 279 if err != nil { 280 return err 281 } 282 client, err := conn.one.Client() 283 if err != nil { 284 return err 285 } 286 self.events.Send(ControlEvent(conn)) 287 return client.Call(nil, "admin_addPeer", string(conn.other.Addr())) 288 } 289 290 // Disconnect disconnects two nodes by calling the "admin_removePeer" RPC 291 // method on the "one" node so that it disconnects from the "other" node 292 func (self *Network) Disconnect(oneID, otherID discover.NodeID) error { 293 conn := self.GetConn(oneID, otherID) 294 if conn == nil { 295 return fmt.Errorf("connection between %v and %v does not exist", oneID, otherID) 296 } 297 if !conn.Up { 298 return fmt.Errorf("%v and %v already disconnected", oneID, otherID) 299 } 300 client, err := conn.one.Client() 301 if err != nil { 302 return err 303 } 304 self.events.Send(ControlEvent(conn)) 305 return client.Call(nil, "admin_removePeer", string(conn.other.Addr())) 306 } 307 308 // DidConnect tracks the fact that the "one" node connected to the "other" node 309 func (self *Network) DidConnect(one, other discover.NodeID) error { 310 conn, err := self.GetOrCreateConn(one, other) 311 if err != nil { 312 return fmt.Errorf("connection between %v and %v does not exist", one, other) 313 } 314 if conn.Up { 315 return fmt.Errorf("%v and %v already connected", one, other) 316 } 317 conn.Up = true 318 self.events.Send(NewEvent(conn)) 319 return nil 320 } 321 322 // DidDisconnect tracks the fact that the "one" node disconnected from the 323 // "other" node 324 func (self *Network) DidDisconnect(one, other discover.NodeID) error { 325 conn := self.GetConn(one, other) 326 if conn == nil { 327 return fmt.Errorf("connection between %v and %v does not exist", one, other) 328 } 329 if !conn.Up { 330 return fmt.Errorf("%v and %v already disconnected", one, other) 331 } 332 conn.Up = false 333 conn.initiated = time.Now().Add(-dialBanTimeout) 334 self.events.Send(NewEvent(conn)) 335 return nil 336 } 337 338 // DidSend tracks the fact that "sender" sent a message to "receiver" 339 func (self *Network) DidSend(sender, receiver discover.NodeID, proto string, code uint64) error { 340 msg := &Msg{ 341 One: sender, 342 Other: receiver, 343 Protocol: proto, 344 Code: code, 345 Received: false, 346 } 347 self.events.Send(NewEvent(msg)) 348 return nil 349 } 350 351 // DidReceive tracks the fact that "receiver" received a message from "sender" 352 func (self *Network) DidReceive(sender, receiver discover.NodeID, proto string, code uint64) error { 353 msg := &Msg{ 354 One: sender, 355 Other: receiver, 356 Protocol: proto, 357 Code: code, 358 Received: true, 359 } 360 self.events.Send(NewEvent(msg)) 361 return nil 362 } 363 364 // GetNode gets the node with the given ID, returning nil if the node does not 365 // exist 366 func (self *Network) GetNode(id discover.NodeID) *Node { 367 self.lock.Lock() 368 defer self.lock.Unlock() 369 return self.getNode(id) 370 } 371 372 // GetNode gets the node with the given name, returning nil if the node does 373 // not exist 374 func (self *Network) GetNodeByName(name string) *Node { 375 self.lock.Lock() 376 defer self.lock.Unlock() 377 return self.getNodeByName(name) 378 } 379 380 func (self *Network) getNode(id discover.NodeID) *Node { 381 i, found := self.nodeMap[id] 382 if !found { 383 return nil 384 } 385 return self.Nodes[i] 386 } 387 388 func (self *Network) getNodeByName(name string) *Node { 389 for _, node := range self.Nodes { 390 if node.Config.Name == name { 391 return node 392 } 393 } 394 return nil 395 } 396 397 // GetNodes returns the existing nodes 398 func (self *Network) GetNodes() (nodes []*Node) { 399 self.lock.Lock() 400 defer self.lock.Unlock() 401 402 nodes = append(nodes, self.Nodes...) 403 return nodes 404 } 405 406 // GetConn returns the connection which exists between "one" and "other" 407 // regardless of which node initiated the connection 408 func (self *Network) GetConn(oneID, otherID discover.NodeID) *Conn { 409 self.lock.Lock() 410 defer self.lock.Unlock() 411 return self.getConn(oneID, otherID) 412 } 413 414 // GetOrCreateConn is like GetConn but creates the connection if it doesn't 415 // already exist 416 func (self *Network) GetOrCreateConn(oneID, otherID discover.NodeID) (*Conn, error) { 417 self.lock.Lock() 418 defer self.lock.Unlock() 419 return self.getOrCreateConn(oneID, otherID) 420 } 421 422 func (self *Network) getOrCreateConn(oneID, otherID discover.NodeID) (*Conn, error) { 423 if conn := self.getConn(oneID, otherID); conn != nil { 424 return conn, nil 425 } 426 427 one := self.getNode(oneID) 428 if one == nil { 429 return nil, fmt.Errorf("node %v does not exist", oneID) 430 } 431 other := self.getNode(otherID) 432 if other == nil { 433 return nil, fmt.Errorf("node %v does not exist", otherID) 434 } 435 conn := &Conn{ 436 One: oneID, 437 Other: otherID, 438 one: one, 439 other: other, 440 } 441 label := ConnLabel(oneID, otherID) 442 self.connMap[label] = len(self.Conns) 443 self.Conns = append(self.Conns, conn) 444 return conn, nil 445 } 446 447 func (self *Network) getConn(oneID, otherID discover.NodeID) *Conn { 448 label := ConnLabel(oneID, otherID) 449 i, found := self.connMap[label] 450 if !found { 451 return nil 452 } 453 return self.Conns[i] 454 } 455 456 // InitConn(one, other) retrieves the connectiton model for the connection between 457 // peers one and other, or creates a new one if it does not exist 458 // the order of nodes does not matter, i.e., Conn(i,j) == Conn(j, i) 459 // it checks if the connection is already up, and if the nodes are running 460 // NOTE: 461 // it also checks whether there has been recent attempt to connect the peers 462 // this is cheating as the simulation is used as an oracle and know about 463 // remote peers attempt to connect to a node which will then not initiate the connection 464 func (self *Network) InitConn(oneID, otherID discover.NodeID) (*Conn, error) { 465 self.lock.Lock() 466 defer self.lock.Unlock() 467 if oneID == otherID { 468 return nil, fmt.Errorf("refusing to connect to self %v", oneID) 469 } 470 conn, err := self.getOrCreateConn(oneID, otherID) 471 if err != nil { 472 return nil, err 473 } 474 if time.Since(conn.initiated) < dialBanTimeout { 475 return nil, fmt.Errorf("connection between %v and %v recently attempted", oneID, otherID) 476 } 477 if conn.Up { 478 return nil, fmt.Errorf("%v and %v already connected", oneID, otherID) 479 } 480 err = conn.nodesUp() 481 if err != nil { 482 return nil, fmt.Errorf("nodes not up: %v", err) 483 } 484 conn.initiated = time.Now() 485 return conn, nil 486 } 487 488 // Shutdown stops all nodes in the network and closes the quit channel 489 func (self *Network) Shutdown() { 490 for _, node := range self.Nodes { 491 log.Debug(fmt.Sprintf("stopping node %s", node.ID().TerminalString())) 492 if err := node.Stop(); err != nil { 493 log.Warn(fmt.Sprintf("error stopping node %s", node.ID().TerminalString()), "err", err) 494 } 495 } 496 close(self.quitc) 497 } 498 499 //Reset resets all network properties: 500 //emtpies the nodes and the connection list 501 func (self *Network) Reset() { 502 self.lock.Lock() 503 defer self.lock.Unlock() 504 505 //re-initialize the maps 506 self.connMap = make(map[string]int) 507 self.nodeMap = make(map[discover.NodeID]int) 508 509 self.Nodes = nil 510 self.Conns = nil 511 } 512 513 // Node is a wrapper around adapters.Node which is used to track the status 514 // of a node in the network 515 type Node struct { 516 adapters.Node `json:"-"` 517 518 // Config if the config used to created the node 519 Config *adapters.NodeConfig `json:"config"` 520 521 // Up tracks whether or not the node is running 522 Up bool `json:"up"` 523 } 524 525 // ID returns the ID of the node 526 func (self *Node) ID() discover.NodeID { 527 return self.Config.ID 528 } 529 530 // String returns a log-friendly string 531 func (self *Node) String() string { 532 return fmt.Sprintf("Node %v", self.ID().TerminalString()) 533 } 534 535 // NodeInfo returns information about the node 536 func (self *Node) NodeInfo() *p2p.NodeInfo { 537 // avoid a panic if the node is not started yet 538 if self.Node == nil { 539 return nil 540 } 541 info := self.Node.NodeInfo() 542 info.Name = self.Config.Name 543 return info 544 } 545 546 // MarshalJSON implements the json.Marshaler interface so that the encoded 547 // JSON includes the NodeInfo 548 func (self *Node) MarshalJSON() ([]byte, error) { 549 return json.Marshal(struct { 550 Info *p2p.NodeInfo `json:"info,omitempty"` 551 Config *adapters.NodeConfig `json:"config,omitempty"` 552 Up bool `json:"up"` 553 }{ 554 Info: self.NodeInfo(), 555 Config: self.Config, 556 Up: self.Up, 557 }) 558 } 559 560 // Conn represents a connection between two nodes in the network 561 type Conn struct { 562 // One is the node which initiated the connection 563 One discover.NodeID `json:"one"` 564 565 // Other is the node which the connection was made to 566 Other discover.NodeID `json:"other"` 567 568 // Up tracks whether or not the connection is active 569 Up bool `json:"up"` 570 // Registers when the connection was grabbed to dial 571 initiated time.Time 572 573 one *Node 574 other *Node 575 } 576 577 // nodesUp returns whether both nodes are currently up 578 func (self *Conn) nodesUp() error { 579 if !self.one.Up { 580 return fmt.Errorf("one %v is not up", self.One) 581 } 582 if !self.other.Up { 583 return fmt.Errorf("other %v is not up", self.Other) 584 } 585 return nil 586 } 587 588 // String returns a log-friendly string 589 func (self *Conn) String() string { 590 return fmt.Sprintf("Conn %v->%v", self.One.TerminalString(), self.Other.TerminalString()) 591 } 592 593 // Msg represents a p2p message sent between two nodes in the network 594 type Msg struct { 595 One discover.NodeID `json:"one"` 596 Other discover.NodeID `json:"other"` 597 Protocol string `json:"protocol"` 598 Code uint64 `json:"code"` 599 Received bool `json:"received"` 600 } 601 602 // String returns a log-friendly string 603 func (self *Msg) String() string { 604 return fmt.Sprintf("Msg(%d) %v->%v", self.Code, self.One.TerminalString(), self.Other.TerminalString()) 605 } 606 607 // ConnLabel generates a deterministic string which represents a connection 608 // between two nodes, used to compare if two connections are between the same 609 // nodes 610 func ConnLabel(source, target discover.NodeID) string { 611 var first, second discover.NodeID 612 if bytes.Compare(source.Bytes(), target.Bytes()) > 0 { 613 first = target 614 second = source 615 } else { 616 first = source 617 second = target 618 } 619 return fmt.Sprintf("%v-%v", first, second) 620 } 621 622 // Snapshot represents the state of a network at a single point in time and can 623 // be used to restore the state of a network 624 type Snapshot struct { 625 Nodes []NodeSnapshot `json:"nodes,omitempty"` 626 Conns []Conn `json:"conns,omitempty"` 627 } 628 629 // NodeSnapshot represents the state of a node in the network 630 type NodeSnapshot struct { 631 Node Node `json:"node,omitempty"` 632 633 // Snapshots is arbitrary data gathered from calling node.Snapshots() 634 Snapshots map[string][]byte `json:"snapshots,omitempty"` 635 } 636 637 // Snapshot creates a network snapshot 638 func (self *Network) Snapshot() (*Snapshot, error) { 639 self.lock.Lock() 640 defer self.lock.Unlock() 641 snap := &Snapshot{ 642 Nodes: make([]NodeSnapshot, len(self.Nodes)), 643 Conns: make([]Conn, len(self.Conns)), 644 } 645 for i, node := range self.Nodes { 646 snap.Nodes[i] = NodeSnapshot{Node: *node} 647 if !node.Up { 648 continue 649 } 650 snapshots, err := node.Snapshots() 651 if err != nil { 652 return nil, err 653 } 654 snap.Nodes[i].Snapshots = snapshots 655 } 656 for i, conn := range self.Conns { 657 snap.Conns[i] = *conn 658 } 659 return snap, nil 660 } 661 662 // Load loads a network snapshot 663 func (self *Network) Load(snap *Snapshot) error { 664 for _, n := range snap.Nodes { 665 if _, err := self.NewNodeWithConfig(n.Node.Config); err != nil { 666 return err 667 } 668 if !n.Node.Up { 669 continue 670 } 671 if err := self.startWithSnapshots(n.Node.Config.ID, n.Snapshots); err != nil { 672 return err 673 } 674 } 675 for _, conn := range snap.Conns { 676 677 if !self.GetNode(conn.One).Up || !self.GetNode(conn.Other).Up { 678 //in this case, at least one of the nodes of a connection is not up, 679 //so it would result in the snapshot `Load` to fail 680 continue 681 } 682 if err := self.Connect(conn.One, conn.Other); err != nil { 683 return err 684 } 685 } 686 return nil 687 } 688 689 // Subscribe reads control events from a channel and executes them 690 func (self *Network) Subscribe(events chan *Event) { 691 for { 692 select { 693 case event, ok := <-events: 694 if !ok { 695 return 696 } 697 if event.Control { 698 self.executeControlEvent(event) 699 } 700 case <-self.quitc: 701 return 702 } 703 } 704 } 705 706 func (self *Network) executeControlEvent(event *Event) { 707 log.Trace("execute control event", "type", event.Type, "event", event) 708 switch event.Type { 709 case EventTypeNode: 710 if err := self.executeNodeEvent(event); err != nil { 711 log.Error("error executing node event", "event", event, "err", err) 712 } 713 case EventTypeConn: 714 if err := self.executeConnEvent(event); err != nil { 715 log.Error("error executing conn event", "event", event, "err", err) 716 } 717 case EventTypeMsg: 718 log.Warn("ignoring control msg event") 719 } 720 } 721 722 func (self *Network) executeNodeEvent(e *Event) error { 723 if !e.Node.Up { 724 return self.Stop(e.Node.ID()) 725 } 726 727 if _, err := self.NewNodeWithConfig(e.Node.Config); err != nil { 728 return err 729 } 730 return self.Start(e.Node.ID()) 731 } 732 733 func (self *Network) executeConnEvent(e *Event) error { 734 if e.Conn.Up { 735 return self.Connect(e.Conn.One, e.Conn.Other) 736 } else { 737 return self.Disconnect(e.Conn.One, e.Conn.Other) 738 } 739 }