github.com/etsc3259/etsc@v0.0.0-20190109113336-a9c2c10f9c95/swarm/network/simulations/discovery/discovery_test.go (about) 1 // Copyright 2018 The go-etsc Authors 2 // This file is part of the go-etsc library. 3 // 4 // The go-etsc library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-etsc library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-etsc library. If not, see <http://www.gnu.org/licenses/>. 16 17 package discovery 18 19 import ( 20 "context" 21 "encoding/json" 22 "errors" 23 "flag" 24 "fmt" 25 "io/ioutil" 26 "math/rand" 27 "os" 28 "path" 29 "strings" 30 "sync" 31 "testing" 32 "time" 33 34 "github.com/ETSC3259/etsc/log" 35 "github.com/ETSC3259/etsc/node" 36 "github.com/ETSC3259/etsc/p2p" 37 "github.com/ETSC3259/etsc/p2p/enode" 38 "github.com/ETSC3259/etsc/p2p/simulations" 39 "github.com/ETSC3259/etsc/p2p/simulations/adapters" 40 "github.com/ETSC3259/etsc/swarm/network" 41 "github.com/ETSC3259/etsc/swarm/state" 42 colorable "github.com/mattn/go-colorable" 43 ) 44 45 // serviceName is used with the exec adapter so the exec'd binary knows which 46 // service to execute 47 const serviceName = "discovery" 48 const testMinProxBinSize = 2 49 const discoveryPersistenceDatadir = "discovery_persistence_test_store" 50 51 var discoveryPersistencePath = path.Join(os.TempDir(), discoveryPersistenceDatadir) 52 var discoveryEnabled = true 53 var persistenceEnabled = false 54 55 var services = adapters.Services{ 56 serviceName: newService, 57 } 58 59 func cleanDbStores() error { 60 entries, err := ioutil.ReadDir(os.TempDir()) 61 if err != nil { 62 return err 63 } 64 65 for _, f := range entries { 66 if strings.HasPrefix(f.Name(), discoveryPersistenceDatadir) { 67 os.RemoveAll(path.Join(os.TempDir(), f.Name())) 68 } 69 } 70 return nil 71 72 } 73 74 func getDbStore(nodeID string) (*state.DBStore, error) { 75 if _, err := os.Stat(discoveryPersistencePath + "_" + nodeID); os.IsNotExist(err) { 76 log.Info(fmt.Sprintf("directory for nodeID %s does not exist. creating...", nodeID)) 77 ioutil.TempDir("", discoveryPersistencePath+"_"+nodeID) 78 } 79 log.Info(fmt.Sprintf("opening storage directory for nodeID %s", nodeID)) 80 store, err := state.NewDBStore(discoveryPersistencePath + "_" + nodeID) 81 if err != nil { 82 return nil, err 83 } 84 return store, nil 85 } 86 87 var ( 88 nodeCount = flag.Int("nodes", 10, "number of nodes to create (default 10)") 89 initCount = flag.Int("conns", 1, "number of originally connected peers (default 1)") 90 snapshotFile = flag.String("snapshot", "", "path to create snapshot file in") 91 loglevel = flag.Int("loglevel", 3, "verbosity of logs") 92 rawlog = flag.Bool("rawlog", false, "remove terminal formatting from logs") 93 serviceOverride = flag.String("services", "", "remove or add services to the node snapshot; prefix with \"+\" to add, \"-\" to remove; example: +pss,-discovery") 94 ) 95 96 func init() { 97 flag.Parse() 98 // register the discovery service which will run as a devp2p 99 // protocol when using the exec adapter 100 adapters.RegisterServices(services) 101 102 log.PrintOrigins(true) 103 log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(*loglevel), log.StreamHandler(colorable.NewColorableStderr(), log.TerminalFormat(!*rawlog)))) 104 } 105 106 // Benchmarks to test the average time it takes for an N-node ring 107 // to full a healthy kademlia topology 108 func BenchmarkDiscovery_8_1(b *testing.B) { benchmarkDiscovery(b, 8, 1) } 109 func BenchmarkDiscovery_16_1(b *testing.B) { benchmarkDiscovery(b, 16, 1) } 110 func BenchmarkDiscovery_32_1(b *testing.B) { benchmarkDiscovery(b, 32, 1) } 111 func BenchmarkDiscovery_64_1(b *testing.B) { benchmarkDiscovery(b, 64, 1) } 112 func BenchmarkDiscovery_128_1(b *testing.B) { benchmarkDiscovery(b, 128, 1) } 113 func BenchmarkDiscovery_256_1(b *testing.B) { benchmarkDiscovery(b, 256, 1) } 114 115 func BenchmarkDiscovery_8_2(b *testing.B) { benchmarkDiscovery(b, 8, 2) } 116 func BenchmarkDiscovery_16_2(b *testing.B) { benchmarkDiscovery(b, 16, 2) } 117 func BenchmarkDiscovery_32_2(b *testing.B) { benchmarkDiscovery(b, 32, 2) } 118 func BenchmarkDiscovery_64_2(b *testing.B) { benchmarkDiscovery(b, 64, 2) } 119 func BenchmarkDiscovery_128_2(b *testing.B) { benchmarkDiscovery(b, 128, 2) } 120 func BenchmarkDiscovery_256_2(b *testing.B) { benchmarkDiscovery(b, 256, 2) } 121 122 func BenchmarkDiscovery_8_4(b *testing.B) { benchmarkDiscovery(b, 8, 4) } 123 func BenchmarkDiscovery_16_4(b *testing.B) { benchmarkDiscovery(b, 16, 4) } 124 func BenchmarkDiscovery_32_4(b *testing.B) { benchmarkDiscovery(b, 32, 4) } 125 func BenchmarkDiscovery_64_4(b *testing.B) { benchmarkDiscovery(b, 64, 4) } 126 func BenchmarkDiscovery_128_4(b *testing.B) { benchmarkDiscovery(b, 128, 4) } 127 func BenchmarkDiscovery_256_4(b *testing.B) { benchmarkDiscovery(b, 256, 4) } 128 129 func TestDiscoverySimulationExecAdapter(t *testing.T) { 130 testDiscoverySimulationExecAdapter(t, *nodeCount, *initCount) 131 } 132 133 func testDiscoverySimulationExecAdapter(t *testing.T, nodes, conns int) { 134 baseDir, err := ioutil.TempDir("", "swarm-test") 135 if err != nil { 136 t.Fatal(err) 137 } 138 defer os.RemoveAll(baseDir) 139 testDiscoverySimulation(t, nodes, conns, adapters.NewExecAdapter(baseDir)) 140 } 141 142 func TestDiscoverySimulationSimAdapter(t *testing.T) { 143 testDiscoverySimulationSimAdapter(t, *nodeCount, *initCount) 144 } 145 146 func TestDiscoveryPersistenceSimulationSimAdapter(t *testing.T) { 147 testDiscoveryPersistenceSimulationSimAdapter(t, *nodeCount, *initCount) 148 } 149 150 func testDiscoveryPersistenceSimulationSimAdapter(t *testing.T, nodes, conns int) { 151 testDiscoveryPersistenceSimulation(t, nodes, conns, adapters.NewSimAdapter(services)) 152 } 153 154 func testDiscoverySimulationSimAdapter(t *testing.T, nodes, conns int) { 155 testDiscoverySimulation(t, nodes, conns, adapters.NewSimAdapter(services)) 156 } 157 158 func testDiscoverySimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) { 159 startedAt := time.Now() 160 result, err := discoverySimulation(nodes, conns, adapter) 161 if err != nil { 162 t.Fatalf("Setting up simulation failed: %v", err) 163 } 164 if result.Error != nil { 165 t.Fatalf("Simulation failed: %s", result.Error) 166 } 167 t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt)) 168 var min, max time.Duration 169 var sum int 170 for _, pass := range result.Passes { 171 duration := pass.Sub(result.StartedAt) 172 if sum == 0 || duration < min { 173 min = duration 174 } 175 if duration > max { 176 max = duration 177 } 178 sum += int(duration.Nanoseconds()) 179 } 180 t.Logf("Min: %s, Max: %s, Average: %s", min, max, time.Duration(sum/len(result.Passes))*time.Nanosecond) 181 finishedAt := time.Now() 182 t.Logf("Setup: %s, shutdown: %s", result.StartedAt.Sub(startedAt), finishedAt.Sub(result.FinishedAt)) 183 } 184 185 func testDiscoveryPersistenceSimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) map[int][]byte { 186 persistenceEnabled = true 187 discoveryEnabled = true 188 189 result, err := discoveryPersistenceSimulation(nodes, conns, adapter) 190 191 if err != nil { 192 t.Fatalf("Setting up simulation failed: %v", err) 193 } 194 if result.Error != nil { 195 t.Fatalf("Simulation failed: %s", result.Error) 196 } 197 t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt)) 198 // set the discovery and persistence flags again to default so other 199 // tests will not be affected 200 discoveryEnabled = true 201 persistenceEnabled = false 202 return nil 203 } 204 205 func benchmarkDiscovery(b *testing.B, nodes, conns int) { 206 for i := 0; i < b.N; i++ { 207 result, err := discoverySimulation(nodes, conns, adapters.NewSimAdapter(services)) 208 if err != nil { 209 b.Fatalf("setting up simulation failed: %v", err) 210 } 211 if result.Error != nil { 212 b.Logf("simulation failed: %s", result.Error) 213 } 214 } 215 } 216 217 func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) { 218 // create network 219 net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{ 220 ID: "0", 221 DefaultService: serviceName, 222 }) 223 defer net.Shutdown() 224 trigger := make(chan enode.ID) 225 ids := make([]enode.ID, nodes) 226 for i := 0; i < nodes; i++ { 227 conf := adapters.RandomNodeConfig() 228 node, err := net.NewNodeWithConfig(conf) 229 if err != nil { 230 return nil, fmt.Errorf("error starting node: %s", err) 231 } 232 if err := net.Start(node.ID()); err != nil { 233 return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 234 } 235 if err := triggerChecks(trigger, net, node.ID()); err != nil { 236 return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 237 } 238 ids[i] = node.ID() 239 } 240 241 // run a simulation which connects the 10 nodes in a ring and waits 242 // for full peer discovery 243 var addrs [][]byte 244 action := func(ctx context.Context) error { 245 return nil 246 } 247 wg := sync.WaitGroup{} 248 for i := range ids { 249 // collect the overlay addresses, to 250 addrs = append(addrs, ids[i].Bytes()) 251 for j := 0; j < conns; j++ { 252 var k int 253 if j == 0 { 254 k = (i + 1) % len(ids) 255 } else { 256 k = rand.Intn(len(ids)) 257 } 258 wg.Add(1) 259 go func(i, k int) { 260 defer wg.Done() 261 net.Connect(ids[i], ids[k]) 262 }(i, k) 263 } 264 } 265 wg.Wait() 266 log.Debug(fmt.Sprintf("nodes: %v", len(addrs))) 267 // construct the peer pot, so that kademlia health can be checked 268 ppmap := network.NewPeerPotMap(testMinProxBinSize, addrs) 269 check := func(ctx context.Context, id enode.ID) (bool, error) { 270 select { 271 case <-ctx.Done(): 272 return false, ctx.Err() 273 default: 274 } 275 276 node := net.GetNode(id) 277 if node == nil { 278 return false, fmt.Errorf("unknown node: %s", id) 279 } 280 client, err := node.Client() 281 if err != nil { 282 return false, fmt.Errorf("error getting node client: %s", err) 283 } 284 healthy := &network.Health{} 285 if err := client.Call(&healthy, "hive_healthy", ppmap[id.String()]); err != nil { 286 return false, fmt.Errorf("error getting node health: %s", err) 287 } 288 log.Debug(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v, saturated: %v\n%v", id, healthy.GotNN, healthy.KnowNN, healthy.Full, healthy.Hive)) 289 return healthy.KnowNN && healthy.GotNN && healthy.Full, nil 290 } 291 292 // 64 nodes ~ 1min 293 // 128 nodes ~ 294 timeout := 300 * time.Second 295 ctx, cancel := context.WithTimeout(context.Background(), timeout) 296 defer cancel() 297 result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{ 298 Action: action, 299 Trigger: trigger, 300 Expect: &simulations.Expectation{ 301 Nodes: ids, 302 Check: check, 303 }, 304 }) 305 if result.Error != nil { 306 return result, nil 307 } 308 309 if *snapshotFile != "" { 310 var err error 311 var snap *simulations.Snapshot 312 if len(*serviceOverride) > 0 { 313 var addServices []string 314 var removeServices []string 315 for _, osvc := range strings.Split(*serviceOverride, ",") { 316 if strings.Index(osvc, "+") == 0 { 317 addServices = append(addServices, osvc[1:]) 318 } else if strings.Index(osvc, "-") == 0 { 319 removeServices = append(removeServices, osvc[1:]) 320 } else { 321 panic("stick to the rules, you know what they are") 322 } 323 } 324 snap, err = net.SnapshotWithServices(addServices, removeServices) 325 } else { 326 snap, err = net.Snapshot() 327 } 328 329 if err != nil { 330 return nil, errors.New("no shapshot dude") 331 } 332 jsonsnapshot, err := json.Marshal(snap) 333 if err != nil { 334 return nil, fmt.Errorf("corrupt json snapshot: %v", err) 335 } 336 log.Info("writing snapshot", "file", *snapshotFile) 337 err = ioutil.WriteFile(*snapshotFile, jsonsnapshot, 0755) 338 if err != nil { 339 return nil, err 340 } 341 } 342 return result, nil 343 } 344 345 func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) { 346 cleanDbStores() 347 defer cleanDbStores() 348 349 // create network 350 net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{ 351 ID: "0", 352 DefaultService: serviceName, 353 }) 354 defer net.Shutdown() 355 trigger := make(chan enode.ID) 356 ids := make([]enode.ID, nodes) 357 var addrs [][]byte 358 359 for i := 0; i < nodes; i++ { 360 conf := adapters.RandomNodeConfig() 361 node, err := net.NewNodeWithConfig(conf) 362 if err != nil { 363 panic(err) 364 } 365 if err != nil { 366 return nil, fmt.Errorf("error starting node: %s", err) 367 } 368 if err := net.Start(node.ID()); err != nil { 369 return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 370 } 371 if err := triggerChecks(trigger, net, node.ID()); err != nil { 372 return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 373 } 374 ids[i] = node.ID() 375 a := ids[i].Bytes() 376 377 addrs = append(addrs, a) 378 } 379 380 // run a simulation which connects the 10 nodes in a ring and waits 381 // for full peer discovery 382 ppmap := network.NewPeerPotMap(testMinProxBinSize, addrs) 383 384 var restartTime time.Time 385 386 action := func(ctx context.Context) error { 387 ticker := time.NewTicker(500 * time.Millisecond) 388 389 for range ticker.C { 390 isHealthy := true 391 for _, id := range ids { 392 //call Healthy RPC 393 node := net.GetNode(id) 394 if node == nil { 395 return fmt.Errorf("unknown node: %s", id) 396 } 397 client, err := node.Client() 398 if err != nil { 399 return fmt.Errorf("error getting node client: %s", err) 400 } 401 healthy := &network.Health{} 402 addr := id.String() 403 if err := client.Call(&healthy, "hive_healthy", ppmap[addr]); err != nil { 404 return fmt.Errorf("error getting node health: %s", err) 405 } 406 407 log.Info(fmt.Sprintf("NODE: %s, IS HEALTHY: %t", addr, healthy.GotNN && healthy.KnowNN && healthy.Full)) 408 if !healthy.GotNN || !healthy.Full { 409 isHealthy = false 410 break 411 } 412 } 413 if isHealthy { 414 break 415 } 416 } 417 ticker.Stop() 418 419 log.Info("reached healthy kademlia. starting to shutdown nodes.") 420 shutdownStarted := time.Now() 421 // stop all ids, then start them again 422 for _, id := range ids { 423 node := net.GetNode(id) 424 425 if err := net.Stop(node.ID()); err != nil { 426 return fmt.Errorf("error stopping node %s: %s", node.ID().TerminalString(), err) 427 } 428 } 429 log.Info(fmt.Sprintf("shutting down nodes took: %s", time.Since(shutdownStarted))) 430 persistenceEnabled = true 431 discoveryEnabled = false 432 restartTime = time.Now() 433 for _, id := range ids { 434 node := net.GetNode(id) 435 if err := net.Start(node.ID()); err != nil { 436 return fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 437 } 438 if err := triggerChecks(trigger, net, node.ID()); err != nil { 439 return fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 440 } 441 } 442 443 log.Info(fmt.Sprintf("restarting nodes took: %s", time.Since(restartTime))) 444 445 return nil 446 } 447 //connects in a chain 448 wg := sync.WaitGroup{} 449 //connects in a ring 450 for i := range ids { 451 for j := 1; j <= conns; j++ { 452 k := (i + j) % len(ids) 453 if k == i { 454 k = (k + 1) % len(ids) 455 } 456 wg.Add(1) 457 go func(i, k int) { 458 defer wg.Done() 459 net.Connect(ids[i], ids[k]) 460 }(i, k) 461 } 462 } 463 wg.Wait() 464 log.Debug(fmt.Sprintf("nodes: %v", len(addrs))) 465 // construct the peer pot, so that kademlia health can be checked 466 check := func(ctx context.Context, id enode.ID) (bool, error) { 467 select { 468 case <-ctx.Done(): 469 return false, ctx.Err() 470 default: 471 } 472 473 node := net.GetNode(id) 474 if node == nil { 475 return false, fmt.Errorf("unknown node: %s", id) 476 } 477 client, err := node.Client() 478 if err != nil { 479 return false, fmt.Errorf("error getting node client: %s", err) 480 } 481 healthy := &network.Health{} 482 if err := client.Call(&healthy, "hive_healthy", ppmap[id.String()]); err != nil { 483 return false, fmt.Errorf("error getting node health: %s", err) 484 } 485 log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v, saturated: %v", id, healthy.GotNN, healthy.KnowNN, healthy.Full)) 486 487 return healthy.KnowNN && healthy.GotNN && healthy.Full, nil 488 } 489 490 // 64 nodes ~ 1min 491 // 128 nodes ~ 492 timeout := 300 * time.Second 493 ctx, cancel := context.WithTimeout(context.Background(), timeout) 494 defer cancel() 495 result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{ 496 Action: action, 497 Trigger: trigger, 498 Expect: &simulations.Expectation{ 499 Nodes: ids, 500 Check: check, 501 }, 502 }) 503 if result.Error != nil { 504 return result, nil 505 } 506 507 return result, nil 508 } 509 510 // triggerChecks triggers a simulation step check whenever a peer is added or 511 // removed from the given node, and also every second to avoid a race between 512 // peer events and kademlia becoming healthy 513 func triggerChecks(trigger chan enode.ID, net *simulations.Network, id enode.ID) error { 514 node := net.GetNode(id) 515 if node == nil { 516 return fmt.Errorf("unknown node: %s", id) 517 } 518 client, err := node.Client() 519 if err != nil { 520 return err 521 } 522 events := make(chan *p2p.PeerEvent) 523 sub, err := client.Subscribe(context.Background(), "admin", events, "peerEvents") 524 if err != nil { 525 return fmt.Errorf("error getting peer events for node %v: %s", id, err) 526 } 527 go func() { 528 defer sub.Unsubscribe() 529 530 tick := time.NewTicker(time.Second) 531 defer tick.Stop() 532 533 for { 534 select { 535 case <-events: 536 trigger <- id 537 case <-tick.C: 538 trigger <- id 539 case err := <-sub.Err(): 540 if err != nil { 541 log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err) 542 } 543 return 544 } 545 } 546 }() 547 return nil 548 } 549 550 func newService(ctx *adapters.ServiceContext) (node.Service, error) { 551 addr := network.NewAddr(ctx.Config.Node()) 552 553 kp := network.NewKadParams() 554 kp.MinProxBinSize = testMinProxBinSize 555 556 if ctx.Config.Reachable != nil { 557 kp.Reachable = func(o *network.BzzAddr) bool { 558 return ctx.Config.Reachable(o.ID()) 559 } 560 } 561 kad := network.NewKademlia(addr.Over(), kp) 562 hp := network.NewHiveParams() 563 hp.KeepAliveInterval = time.Duration(200) * time.Millisecond 564 hp.Discovery = discoveryEnabled 565 566 log.Info(fmt.Sprintf("discovery for nodeID %s is %t", ctx.Config.ID.String(), hp.Discovery)) 567 568 config := &network.BzzConfig{ 569 OverlayAddr: addr.Over(), 570 UnderlayAddr: addr.Under(), 571 HiveParams: hp, 572 } 573 574 if persistenceEnabled { 575 log.Info(fmt.Sprintf("persistence enabled for nodeID %s", ctx.Config.ID.String())) 576 store, err := getDbStore(ctx.Config.ID.String()) 577 if err != nil { 578 return nil, err 579 } 580 return network.NewBzz(config, kad, store, nil, nil), nil 581 } 582 583 return network.NewBzz(config, kad, nil, nil, nil), nil 584 }