github.com/shyftnetwork/go-empyrean@v1.8.3-0.20191127201940-fbfca9338f04/swarm/network/simulations/discovery/discovery_test.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package discovery 18 19 import ( 20 "context" 21 "encoding/json" 22 "errors" 23 "flag" 24 "fmt" 25 "io/ioutil" 26 "math/rand" 27 "os" 28 "path" 29 "strings" 30 "sync" 31 "testing" 32 "time" 33 34 "github.com/ShyftNetwork/go-empyrean/common" 35 "github.com/ShyftNetwork/go-empyrean/log" 36 "github.com/ShyftNetwork/go-empyrean/node" 37 "github.com/ShyftNetwork/go-empyrean/p2p" 38 "github.com/ShyftNetwork/go-empyrean/p2p/enode" 39 "github.com/ShyftNetwork/go-empyrean/p2p/simulations" 40 "github.com/ShyftNetwork/go-empyrean/p2p/simulations/adapters" 41 "github.com/ShyftNetwork/go-empyrean/swarm/network" 42 "github.com/ShyftNetwork/go-empyrean/swarm/state" 43 colorable "github.com/mattn/go-colorable" 44 ) 45 46 // serviceName is used with the exec adapter so the exec'd binary knows which 47 // service to execute 48 const serviceName = "discovery" 49 const testNeighbourhoodSize = 2 50 const discoveryPersistenceDatadir = "discovery_persistence_test_store" 51 52 var discoveryPersistencePath = path.Join(os.TempDir(), discoveryPersistenceDatadir) 53 var discoveryEnabled = true 54 var persistenceEnabled = false 55 56 var services = adapters.Services{ 57 serviceName: newService, 58 } 59 60 func cleanDbStores() error { 61 entries, err := ioutil.ReadDir(os.TempDir()) 62 if err != nil { 63 return err 64 } 65 66 for _, f := range entries { 67 if strings.HasPrefix(f.Name(), discoveryPersistenceDatadir) { 68 os.RemoveAll(path.Join(os.TempDir(), f.Name())) 69 } 70 } 71 return nil 72 73 } 74 75 func getDbStore(nodeID string) (*state.DBStore, error) { 76 if _, err := os.Stat(discoveryPersistencePath + "_" + nodeID); os.IsNotExist(err) { 77 log.Info(fmt.Sprintf("directory for nodeID %s does not exist. creating...", nodeID)) 78 ioutil.TempDir("", discoveryPersistencePath+"_"+nodeID) 79 } 80 log.Info(fmt.Sprintf("opening storage directory for nodeID %s", nodeID)) 81 store, err := state.NewDBStore(discoveryPersistencePath + "_" + nodeID) 82 if err != nil { 83 return nil, err 84 } 85 return store, nil 86 } 87 88 var ( 89 nodeCount = flag.Int("nodes", 10, "number of nodes to create (default 10)") 90 initCount = flag.Int("conns", 1, "number of originally connected peers (default 1)") 91 snapshotFile = flag.String("snapshot", "", "path to create snapshot file in") 92 loglevel = flag.Int("loglevel", 3, "verbosity of logs") 93 rawlog = flag.Bool("rawlog", false, "remove terminal formatting from logs") 94 serviceOverride = flag.String("services", "", "remove or add services to the node snapshot; prefix with \"+\" to add, \"-\" to remove; example: +pss,-discovery") 95 ) 96 97 func init() { 98 flag.Parse() 99 // register the discovery service which will run as a devp2p 100 // protocol when using the exec adapter 101 adapters.RegisterServices(services) 102 103 log.PrintOrigins(true) 104 log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(*loglevel), log.StreamHandler(colorable.NewColorableStderr(), log.TerminalFormat(!*rawlog)))) 105 } 106 107 // Benchmarks to test the average time it takes for an N-node ring 108 // to full a healthy kademlia topology 109 func BenchmarkDiscovery_8_1(b *testing.B) { benchmarkDiscovery(b, 8, 1) } 110 func BenchmarkDiscovery_16_1(b *testing.B) { benchmarkDiscovery(b, 16, 1) } 111 func BenchmarkDiscovery_32_1(b *testing.B) { benchmarkDiscovery(b, 32, 1) } 112 func BenchmarkDiscovery_64_1(b *testing.B) { benchmarkDiscovery(b, 64, 1) } 113 func BenchmarkDiscovery_128_1(b *testing.B) { benchmarkDiscovery(b, 128, 1) } 114 func BenchmarkDiscovery_256_1(b *testing.B) { benchmarkDiscovery(b, 256, 1) } 115 116 func BenchmarkDiscovery_8_2(b *testing.B) { benchmarkDiscovery(b, 8, 2) } 117 func BenchmarkDiscovery_16_2(b *testing.B) { benchmarkDiscovery(b, 16, 2) } 118 func BenchmarkDiscovery_32_2(b *testing.B) { benchmarkDiscovery(b, 32, 2) } 119 func BenchmarkDiscovery_64_2(b *testing.B) { benchmarkDiscovery(b, 64, 2) } 120 func BenchmarkDiscovery_128_2(b *testing.B) { benchmarkDiscovery(b, 128, 2) } 121 func BenchmarkDiscovery_256_2(b *testing.B) { benchmarkDiscovery(b, 256, 2) } 122 123 func BenchmarkDiscovery_8_4(b *testing.B) { benchmarkDiscovery(b, 8, 4) } 124 func BenchmarkDiscovery_16_4(b *testing.B) { benchmarkDiscovery(b, 16, 4) } 125 func BenchmarkDiscovery_32_4(b *testing.B) { benchmarkDiscovery(b, 32, 4) } 126 func BenchmarkDiscovery_64_4(b *testing.B) { benchmarkDiscovery(b, 64, 4) } 127 func BenchmarkDiscovery_128_4(b *testing.B) { benchmarkDiscovery(b, 128, 4) } 128 func BenchmarkDiscovery_256_4(b *testing.B) { benchmarkDiscovery(b, 256, 4) } 129 130 func TestDiscoverySimulationExecAdapter(t *testing.T) { 131 testDiscoverySimulationExecAdapter(t, *nodeCount, *initCount) 132 } 133 134 func testDiscoverySimulationExecAdapter(t *testing.T, nodes, conns int) { 135 baseDir, err := ioutil.TempDir("", "swarm-test") 136 if err != nil { 137 t.Fatal(err) 138 } 139 defer os.RemoveAll(baseDir) 140 testDiscoverySimulation(t, nodes, conns, adapters.NewExecAdapter(baseDir)) 141 } 142 143 func TestDiscoverySimulationSimAdapter(t *testing.T) { 144 testDiscoverySimulationSimAdapter(t, *nodeCount, *initCount) 145 } 146 147 func TestDiscoveryPersistenceSimulationSimAdapter(t *testing.T) { 148 testDiscoveryPersistenceSimulationSimAdapter(t, *nodeCount, *initCount) 149 } 150 151 func testDiscoveryPersistenceSimulationSimAdapter(t *testing.T, nodes, conns int) { 152 testDiscoveryPersistenceSimulation(t, nodes, conns, adapters.NewSimAdapter(services)) 153 } 154 155 func testDiscoverySimulationSimAdapter(t *testing.T, nodes, conns int) { 156 testDiscoverySimulation(t, nodes, conns, adapters.NewSimAdapter(services)) 157 } 158 159 func testDiscoverySimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) { 160 t.Skip("discovery tests depend on suggestpeer, which is unreliable after kademlia depth change.") 161 startedAt := time.Now() 162 result, err := discoverySimulation(nodes, conns, adapter) 163 if err != nil { 164 t.Fatalf("Setting up simulation failed: %v", err) 165 } 166 if result.Error != nil { 167 t.Fatalf("Simulation failed: %s", result.Error) 168 } 169 t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt)) 170 var min, max time.Duration 171 var sum int 172 for _, pass := range result.Passes { 173 duration := pass.Sub(result.StartedAt) 174 if sum == 0 || duration < min { 175 min = duration 176 } 177 if duration > max { 178 max = duration 179 } 180 sum += int(duration.Nanoseconds()) 181 } 182 t.Logf("Min: %s, Max: %s, Average: %s", min, max, time.Duration(sum/len(result.Passes))*time.Nanosecond) 183 finishedAt := time.Now() 184 t.Logf("Setup: %s, shutdown: %s", result.StartedAt.Sub(startedAt), finishedAt.Sub(result.FinishedAt)) 185 } 186 187 func testDiscoveryPersistenceSimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) map[int][]byte { 188 t.Skip("discovery tests depend on suggestpeer, which is unreliable after kademlia depth change.") 189 persistenceEnabled = true 190 discoveryEnabled = true 191 192 result, err := discoveryPersistenceSimulation(nodes, conns, adapter) 193 194 if err != nil { 195 t.Fatalf("Setting up simulation failed: %v", err) 196 } 197 if result.Error != nil { 198 t.Fatalf("Simulation failed: %s", result.Error) 199 } 200 t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt)) 201 // set the discovery and persistence flags again to default so other 202 // tests will not be affected 203 discoveryEnabled = true 204 persistenceEnabled = false 205 return nil 206 } 207 208 func benchmarkDiscovery(b *testing.B, nodes, conns int) { 209 for i := 0; i < b.N; i++ { 210 result, err := discoverySimulation(nodes, conns, adapters.NewSimAdapter(services)) 211 if err != nil { 212 b.Fatalf("setting up simulation failed: %v", err) 213 } 214 if result.Error != nil { 215 b.Logf("simulation failed: %s", result.Error) 216 } 217 } 218 } 219 220 func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) { 221 // create network 222 net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{ 223 ID: "0", 224 DefaultService: serviceName, 225 }) 226 defer net.Shutdown() 227 trigger := make(chan enode.ID) 228 ids := make([]enode.ID, nodes) 229 for i := 0; i < nodes; i++ { 230 conf := adapters.RandomNodeConfig() 231 node, err := net.NewNodeWithConfig(conf) 232 if err != nil { 233 return nil, fmt.Errorf("error starting node: %s", err) 234 } 235 if err := net.Start(node.ID()); err != nil { 236 return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 237 } 238 if err := triggerChecks(trigger, net, node.ID()); err != nil { 239 return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 240 } 241 ids[i] = node.ID() 242 } 243 244 // run a simulation which connects the 10 nodes in a ring and waits 245 // for full peer discovery 246 var addrs [][]byte 247 action := func(ctx context.Context) error { 248 return nil 249 } 250 wg := sync.WaitGroup{} 251 for i := range ids { 252 // collect the overlay addresses, to 253 addrs = append(addrs, ids[i].Bytes()) 254 for j := 0; j < conns; j++ { 255 var k int 256 if j == 0 { 257 k = (i + 1) % len(ids) 258 } else { 259 k = rand.Intn(len(ids)) 260 } 261 wg.Add(1) 262 go func(i, k int) { 263 defer wg.Done() 264 net.Connect(ids[i], ids[k]) 265 }(i, k) 266 } 267 } 268 wg.Wait() 269 log.Debug(fmt.Sprintf("nodes: %v", len(addrs))) 270 // construct the peer pot, so that kademlia health can be checked 271 ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) 272 check := func(ctx context.Context, id enode.ID) (bool, error) { 273 select { 274 case <-ctx.Done(): 275 return false, ctx.Err() 276 default: 277 } 278 279 node := net.GetNode(id) 280 if node == nil { 281 return false, fmt.Errorf("unknown node: %s", id) 282 } 283 client, err := node.Client() 284 if err != nil { 285 return false, fmt.Errorf("error getting node client: %s", err) 286 } 287 288 healthy := &network.Health{} 289 if err := client.Call(&healthy, "hive_healthy", ppmap); err != nil { 290 return false, fmt.Errorf("error getting node health: %s", err) 291 } 292 log.Info(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive)) 293 return healthy.KnowNN && healthy.ConnectNN, nil 294 } 295 296 // 64 nodes ~ 1min 297 // 128 nodes ~ 298 timeout := 300 * time.Second 299 ctx, cancel := context.WithTimeout(context.Background(), timeout) 300 defer cancel() 301 result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{ 302 Action: action, 303 Trigger: trigger, 304 Expect: &simulations.Expectation{ 305 Nodes: ids, 306 Check: check, 307 }, 308 }) 309 if result.Error != nil { 310 return result, nil 311 } 312 313 if *snapshotFile != "" { 314 var err error 315 var snap *simulations.Snapshot 316 if len(*serviceOverride) > 0 { 317 var addServices []string 318 var removeServices []string 319 for _, osvc := range strings.Split(*serviceOverride, ",") { 320 if strings.Index(osvc, "+") == 0 { 321 addServices = append(addServices, osvc[1:]) 322 } else if strings.Index(osvc, "-") == 0 { 323 removeServices = append(removeServices, osvc[1:]) 324 } else { 325 panic("stick to the rules, you know what they are") 326 } 327 } 328 snap, err = net.SnapshotWithServices(addServices, removeServices) 329 } else { 330 snap, err = net.Snapshot() 331 } 332 333 if err != nil { 334 return nil, errors.New("no shapshot dude") 335 } 336 jsonsnapshot, err := json.Marshal(snap) 337 if err != nil { 338 return nil, fmt.Errorf("corrupt json snapshot: %v", err) 339 } 340 log.Info("writing snapshot", "file", *snapshotFile) 341 err = ioutil.WriteFile(*snapshotFile, jsonsnapshot, 0755) 342 if err != nil { 343 return nil, err 344 } 345 } 346 return result, nil 347 } 348 349 func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) { 350 cleanDbStores() 351 defer cleanDbStores() 352 353 // create network 354 net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{ 355 ID: "0", 356 DefaultService: serviceName, 357 }) 358 defer net.Shutdown() 359 trigger := make(chan enode.ID) 360 ids := make([]enode.ID, nodes) 361 var addrs [][]byte 362 363 for i := 0; i < nodes; i++ { 364 conf := adapters.RandomNodeConfig() 365 node, err := net.NewNodeWithConfig(conf) 366 if err != nil { 367 panic(err) 368 } 369 if err != nil { 370 return nil, fmt.Errorf("error starting node: %s", err) 371 } 372 if err := net.Start(node.ID()); err != nil { 373 return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 374 } 375 if err := triggerChecks(trigger, net, node.ID()); err != nil { 376 return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 377 } 378 // TODO we shouldn't be equating underaddr and overaddr like this, as they are not the same in production 379 ids[i] = node.ID() 380 a := ids[i].Bytes() 381 382 addrs = append(addrs, a) 383 } 384 385 // run a simulation which connects the 10 nodes in a ring and waits 386 // for full peer discovery 387 388 var restartTime time.Time 389 390 action := func(ctx context.Context) error { 391 ticker := time.NewTicker(500 * time.Millisecond) 392 393 for range ticker.C { 394 isHealthy := true 395 for _, id := range ids { 396 //call Healthy RPC 397 node := net.GetNode(id) 398 if node == nil { 399 return fmt.Errorf("unknown node: %s", id) 400 } 401 client, err := node.Client() 402 if err != nil { 403 return fmt.Errorf("error getting node client: %s", err) 404 } 405 healthy := &network.Health{} 406 addr := id.String() 407 ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) 408 if err := client.Call(&healthy, "hive_healthy", ppmap); err != nil { 409 return fmt.Errorf("error getting node health: %s", err) 410 } 411 412 log.Info(fmt.Sprintf("NODE: %s, IS HEALTHY: %t", addr, healthy.ConnectNN && healthy.KnowNN && healthy.CountKnowNN > 0)) 413 var nodeStr string 414 if err := client.Call(&nodeStr, "hive_string"); err != nil { 415 return fmt.Errorf("error getting node string %s", err) 416 } 417 log.Info(nodeStr) 418 for _, a := range addrs { 419 log.Info(common.Bytes2Hex(a)) 420 } 421 if !healthy.ConnectNN || healthy.CountKnowNN == 0 { 422 isHealthy = false 423 break 424 } 425 } 426 if isHealthy { 427 break 428 } 429 } 430 ticker.Stop() 431 432 log.Info("reached healthy kademlia. starting to shutdown nodes.") 433 shutdownStarted := time.Now() 434 // stop all ids, then start them again 435 for _, id := range ids { 436 node := net.GetNode(id) 437 438 if err := net.Stop(node.ID()); err != nil { 439 return fmt.Errorf("error stopping node %s: %s", node.ID().TerminalString(), err) 440 } 441 } 442 log.Info(fmt.Sprintf("shutting down nodes took: %s", time.Since(shutdownStarted))) 443 persistenceEnabled = true 444 discoveryEnabled = false 445 restartTime = time.Now() 446 for _, id := range ids { 447 node := net.GetNode(id) 448 if err := net.Start(node.ID()); err != nil { 449 return fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 450 } 451 if err := triggerChecks(trigger, net, node.ID()); err != nil { 452 return fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 453 } 454 } 455 456 log.Info(fmt.Sprintf("restarting nodes took: %s", time.Since(restartTime))) 457 458 return nil 459 } 460 //connects in a chain 461 wg := sync.WaitGroup{} 462 //connects in a ring 463 for i := range ids { 464 for j := 1; j <= conns; j++ { 465 k := (i + j) % len(ids) 466 if k == i { 467 k = (k + 1) % len(ids) 468 } 469 wg.Add(1) 470 go func(i, k int) { 471 defer wg.Done() 472 net.Connect(ids[i], ids[k]) 473 }(i, k) 474 } 475 } 476 wg.Wait() 477 log.Debug(fmt.Sprintf("nodes: %v", len(addrs))) 478 // construct the peer pot, so that kademlia health can be checked 479 check := func(ctx context.Context, id enode.ID) (bool, error) { 480 select { 481 case <-ctx.Done(): 482 return false, ctx.Err() 483 default: 484 } 485 486 node := net.GetNode(id) 487 if node == nil { 488 return false, fmt.Errorf("unknown node: %s", id) 489 } 490 client, err := node.Client() 491 if err != nil { 492 return false, fmt.Errorf("error getting node client: %s", err) 493 } 494 healthy := &network.Health{} 495 ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) 496 497 if err := client.Call(&healthy, "hive_healthy", ppmap); err != nil { 498 return false, fmt.Errorf("error getting node health: %s", err) 499 } 500 log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN)) 501 502 return healthy.KnowNN && healthy.ConnectNN, nil 503 } 504 505 // 64 nodes ~ 1min 506 // 128 nodes ~ 507 timeout := 300 * time.Second 508 ctx, cancel := context.WithTimeout(context.Background(), timeout) 509 defer cancel() 510 result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{ 511 Action: action, 512 Trigger: trigger, 513 Expect: &simulations.Expectation{ 514 Nodes: ids, 515 Check: check, 516 }, 517 }) 518 if result.Error != nil { 519 return result, nil 520 } 521 522 return result, nil 523 } 524 525 // triggerChecks triggers a simulation step check whenever a peer is added or 526 // removed from the given node, and also every second to avoid a race between 527 // peer events and kademlia becoming healthy 528 func triggerChecks(trigger chan enode.ID, net *simulations.Network, id enode.ID) error { 529 node := net.GetNode(id) 530 if node == nil { 531 return fmt.Errorf("unknown node: %s", id) 532 } 533 client, err := node.Client() 534 if err != nil { 535 return err 536 } 537 events := make(chan *p2p.PeerEvent) 538 sub, err := client.Subscribe(context.Background(), "admin", events, "peerEvents") 539 if err != nil { 540 return fmt.Errorf("error getting peer events for node %v: %s", id, err) 541 } 542 go func() { 543 defer sub.Unsubscribe() 544 545 tick := time.NewTicker(time.Second) 546 defer tick.Stop() 547 548 for { 549 select { 550 case <-events: 551 trigger <- id 552 case <-tick.C: 553 trigger <- id 554 case err := <-sub.Err(): 555 if err != nil { 556 log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err) 557 } 558 return 559 } 560 } 561 }() 562 return nil 563 } 564 565 func newService(ctx *adapters.ServiceContext) (node.Service, error) { 566 addr := network.NewAddr(ctx.Config.Node()) 567 568 kp := network.NewKadParams() 569 kp.NeighbourhoodSize = testNeighbourhoodSize 570 571 if ctx.Config.Reachable != nil { 572 kp.Reachable = func(o *network.BzzAddr) bool { 573 return ctx.Config.Reachable(o.ID()) 574 } 575 } 576 kad := network.NewKademlia(addr.Over(), kp) 577 hp := network.NewHiveParams() 578 hp.KeepAliveInterval = time.Duration(200) * time.Millisecond 579 hp.Discovery = discoveryEnabled 580 581 log.Info(fmt.Sprintf("discovery for nodeID %s is %t", ctx.Config.ID.String(), hp.Discovery)) 582 583 config := &network.BzzConfig{ 584 OverlayAddr: addr.Over(), 585 UnderlayAddr: addr.Under(), 586 HiveParams: hp, 587 } 588 589 if persistenceEnabled { 590 log.Info(fmt.Sprintf("persistence enabled for nodeID %s", ctx.Config.ID.String())) 591 store, err := getDbStore(ctx.Config.ID.String()) 592 if err != nil { 593 return nil, err 594 } 595 return network.NewBzz(config, kad, store, nil, nil), nil 596 } 597 598 return network.NewBzz(config, kad, nil, nil, nil), nil 599 }