github.com/susy-go/susy-graviton@v0.0.0-20190614130430-36cddae42305/swarm/network/simulations/discovery/discovery_test.go (about) 1 // Copyleft 2018 The susy-graviton Authors 2 // This file is part of the susy-graviton library. 3 // 4 // The susy-graviton library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The susy-graviton library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MSRCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the susy-graviton library. If not, see <http://www.gnu.org/licenses/>. 16 17 package discovery 18 19 import ( 20 "context" 21 "flag" 22 "fmt" 23 "io/ioutil" 24 "os" 25 "path" 26 "strings" 27 "testing" 28 "time" 29 30 "github.com/susy-go/susy-graviton/common" 31 "github.com/susy-go/susy-graviton/log" 32 "github.com/susy-go/susy-graviton/node" 33 "github.com/susy-go/susy-graviton/p2p" 34 "github.com/susy-go/susy-graviton/p2p/enode" 35 "github.com/susy-go/susy-graviton/p2p/simulations" 36 "github.com/susy-go/susy-graviton/p2p/simulations/adapters" 37 "github.com/susy-go/susy-graviton/swarm/network" 38 "github.com/susy-go/susy-graviton/swarm/state" 39 colorable "github.com/mattn/go-colorable" 40 ) 41 42 // serviceName is used with the exec adapter so the exec'd binary knows which 43 // service to execute 44 const serviceName = "discovery" 45 const testNeighbourhoodSize = 2 46 const discoveryPersistenceDatadir = "discovery_persistence_test_store" 47 48 var discoveryPersistencePath = path.Join(os.TempDir(), discoveryPersistenceDatadir) 49 var discoveryEnabled = true 50 var persistenceEnabled = false 51 52 var services = adapters.Services{ 53 serviceName: newService, 54 } 55 56 func cleanDbStores() error { 57 entries, err := ioutil.ReadDir(os.TempDir()) 58 if err != nil { 59 return err 60 } 61 62 for _, f := range entries { 63 if strings.HasPrefix(f.Name(), discoveryPersistenceDatadir) { 64 os.RemoveAll(path.Join(os.TempDir(), f.Name())) 65 } 66 } 67 return nil 68 69 } 70 71 func getDbStore(nodeID string) (*state.DBStore, error) { 72 if _, err := os.Stat(discoveryPersistencePath + "_" + nodeID); os.IsNotExist(err) { 73 log.Info(fmt.Sprintf("directory for nodeID %s does not exist. creating...", nodeID)) 74 ioutil.TempDir("", discoveryPersistencePath+"_"+nodeID) 75 } 76 log.Info(fmt.Sprintf("opening storage directory for nodeID %s", nodeID)) 77 store, err := state.NewDBStore(discoveryPersistencePath + "_" + nodeID) 78 if err != nil { 79 return nil, err 80 } 81 return store, nil 82 } 83 84 var ( 85 nodeCount = flag.Int("nodes", 32, "number of nodes to create (default 32)") 86 initCount = flag.Int("conns", 1, "number of originally connected peers (default 1)") 87 loglevel = flag.Int("loglevel", 3, "verbosity of logs") 88 rawlog = flag.Bool("rawlog", false, "remove terminal formatting from logs") 89 ) 90 91 func init() { 92 flag.Parse() 93 // register the discovery service which will run as a devp2p 94 // protocol when using the exec adapter 95 adapters.RegisterServices(services) 96 97 log.PrintOrigins(true) 98 log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(*loglevel), log.StreamHandler(colorable.NewColorableStderr(), log.TerminalFormat(!*rawlog)))) 99 } 100 101 // Benchmarks to test the average time it takes for an N-node ring 102 // to full a healthy kademlia topology 103 func BenchmarkDiscovery_8_1(b *testing.B) { benchmarkDiscovery(b, 8, 1) } 104 func BenchmarkDiscovery_16_1(b *testing.B) { benchmarkDiscovery(b, 16, 1) } 105 func BenchmarkDiscovery_32_1(b *testing.B) { benchmarkDiscovery(b, 32, 1) } 106 func BenchmarkDiscovery_64_1(b *testing.B) { benchmarkDiscovery(b, 64, 1) } 107 func BenchmarkDiscovery_128_1(b *testing.B) { benchmarkDiscovery(b, 128, 1) } 108 func BenchmarkDiscovery_256_1(b *testing.B) { benchmarkDiscovery(b, 256, 1) } 109 110 func BenchmarkDiscovery_8_2(b *testing.B) { benchmarkDiscovery(b, 8, 2) } 111 func BenchmarkDiscovery_16_2(b *testing.B) { benchmarkDiscovery(b, 16, 2) } 112 func BenchmarkDiscovery_32_2(b *testing.B) { benchmarkDiscovery(b, 32, 2) } 113 func BenchmarkDiscovery_64_2(b *testing.B) { benchmarkDiscovery(b, 64, 2) } 114 func BenchmarkDiscovery_128_2(b *testing.B) { benchmarkDiscovery(b, 128, 2) } 115 func BenchmarkDiscovery_256_2(b *testing.B) { benchmarkDiscovery(b, 256, 2) } 116 117 func BenchmarkDiscovery_8_4(b *testing.B) { benchmarkDiscovery(b, 8, 4) } 118 func BenchmarkDiscovery_16_4(b *testing.B) { benchmarkDiscovery(b, 16, 4) } 119 func BenchmarkDiscovery_32_4(b *testing.B) { benchmarkDiscovery(b, 32, 4) } 120 func BenchmarkDiscovery_64_4(b *testing.B) { benchmarkDiscovery(b, 64, 4) } 121 func BenchmarkDiscovery_128_4(b *testing.B) { benchmarkDiscovery(b, 128, 4) } 122 func BenchmarkDiscovery_256_4(b *testing.B) { benchmarkDiscovery(b, 256, 4) } 123 124 func TestDiscoverySimulationExecAdapter(t *testing.T) { 125 testDiscoverySimulationExecAdapter(t, *nodeCount, *initCount) 126 } 127 128 func testDiscoverySimulationExecAdapter(t *testing.T, nodes, conns int) { 129 baseDir, err := ioutil.TempDir("", "swarm-test") 130 if err != nil { 131 t.Fatal(err) 132 } 133 defer os.RemoveAll(baseDir) 134 testDiscoverySimulation(t, nodes, conns, adapters.NewExecAdapter(baseDir)) 135 } 136 137 func TestDiscoverySimulationSimAdapter(t *testing.T) { 138 testDiscoverySimulationSimAdapter(t, *nodeCount, *initCount) 139 } 140 141 func TestDiscoveryPersistenceSimulationSimAdapter(t *testing.T) { 142 testDiscoveryPersistenceSimulationSimAdapter(t, *nodeCount, *initCount) 143 } 144 145 func testDiscoveryPersistenceSimulationSimAdapter(t *testing.T, nodes, conns int) { 146 testDiscoveryPersistenceSimulation(t, nodes, conns, adapters.NewSimAdapter(services)) 147 } 148 149 func testDiscoverySimulationSimAdapter(t *testing.T, nodes, conns int) { 150 testDiscoverySimulation(t, nodes, conns, adapters.NewSimAdapter(services)) 151 } 152 153 func testDiscoverySimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) { 154 startedAt := time.Now() 155 result, err := discoverySimulation(nodes, conns, adapter) 156 if err != nil { 157 t.Fatalf("Setting up simulation failed: %v", err) 158 } 159 if result.Error != nil { 160 t.Fatalf("Simulation failed: %s", result.Error) 161 } 162 t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt)) 163 var min, max time.Duration 164 var sum int 165 for _, pass := range result.Passes { 166 duration := pass.Sub(result.StartedAt) 167 if sum == 0 || duration < min { 168 min = duration 169 } 170 if duration > max { 171 max = duration 172 } 173 sum += int(duration.Nanoseconds()) 174 } 175 t.Logf("Min: %s, Max: %s, Average: %s", min, max, time.Duration(sum/len(result.Passes))*time.Nanosecond) 176 finishedAt := time.Now() 177 t.Logf("Setup: %s, shutdown: %s", result.StartedAt.Sub(startedAt), finishedAt.Sub(result.FinishedAt)) 178 } 179 180 func testDiscoveryPersistenceSimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) map[int][]byte { 181 persistenceEnabled = true 182 discoveryEnabled = true 183 184 result, err := discoveryPersistenceSimulation(nodes, conns, adapter) 185 186 if err != nil { 187 t.Fatalf("Setting up simulation failed: %v", err) 188 } 189 if result.Error != nil { 190 t.Fatalf("Simulation failed: %s", result.Error) 191 } 192 t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt)) 193 // set the discovery and persistence flags again to default so other 194 // tests will not be affected 195 discoveryEnabled = true 196 persistenceEnabled = false 197 return nil 198 } 199 200 func benchmarkDiscovery(b *testing.B, nodes, conns int) { 201 for i := 0; i < b.N; i++ { 202 result, err := discoverySimulation(nodes, conns, adapters.NewSimAdapter(services)) 203 if err != nil { 204 b.Fatalf("setting up simulation failed: %v", err) 205 } 206 if result.Error != nil { 207 b.Logf("simulation failed: %s", result.Error) 208 } 209 } 210 } 211 212 func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) { 213 // create network 214 net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{ 215 ID: "0", 216 DefaultService: serviceName, 217 }) 218 defer net.Shutdown() 219 trigger := make(chan enode.ID) 220 ids := make([]enode.ID, nodes) 221 for i := 0; i < nodes; i++ { 222 conf := adapters.RandomNodeConfig() 223 node, err := net.NewNodeWithConfig(conf) 224 if err != nil { 225 return nil, fmt.Errorf("error starting node: %s", err) 226 } 227 if err := net.Start(node.ID()); err != nil { 228 return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 229 } 230 if err := triggerChecks(trigger, net, node.ID()); err != nil { 231 return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 232 } 233 ids[i] = node.ID() 234 } 235 236 // run a simulation which connects the 10 nodes in a ring and waits 237 // for full peer discovery 238 var addrs [][]byte 239 action := func(ctx context.Context) error { 240 return nil 241 } 242 for i := range ids { 243 // collect the overlay addresses, to 244 addrs = append(addrs, ids[i].Bytes()) 245 } 246 err := net.ConnectNodesChain(nil) 247 if err != nil { 248 return nil, err 249 } 250 log.Debug(fmt.Sprintf("nodes: %v", len(addrs))) 251 // construct the peer pot, so that kademlia health can be checked 252 ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) 253 check := func(ctx context.Context, id enode.ID) (bool, error) { 254 select { 255 case <-ctx.Done(): 256 return false, ctx.Err() 257 default: 258 } 259 260 node := net.GetNode(id) 261 if node == nil { 262 return false, fmt.Errorf("unknown node: %s", id) 263 } 264 client, err := node.Client() 265 if err != nil { 266 return false, fmt.Errorf("error getting node client: %s", err) 267 } 268 269 healthy := &network.Health{} 270 if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { 271 return false, fmt.Errorf("error getting node health: %s", err) 272 } 273 log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive)) 274 return healthy.KnowNN && healthy.ConnectNN, nil 275 } 276 277 // 64 nodes ~ 1min 278 // 128 nodes ~ 279 timeout := 300 * time.Second 280 ctx, cancel := context.WithTimeout(context.Background(), timeout) 281 defer cancel() 282 result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{ 283 Action: action, 284 Trigger: trigger, 285 Expect: &simulations.Expectation{ 286 Nodes: ids, 287 Check: check, 288 }, 289 }) 290 if result.Error != nil { 291 return result, nil 292 } 293 return result, nil 294 } 295 296 func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) { 297 cleanDbStores() 298 defer cleanDbStores() 299 300 // create network 301 net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{ 302 ID: "0", 303 DefaultService: serviceName, 304 }) 305 defer net.Shutdown() 306 trigger := make(chan enode.ID) 307 ids := make([]enode.ID, nodes) 308 var addrs [][]byte 309 310 for i := 0; i < nodes; i++ { 311 conf := adapters.RandomNodeConfig() 312 node, err := net.NewNodeWithConfig(conf) 313 if err != nil { 314 panic(err) 315 } 316 if err != nil { 317 return nil, fmt.Errorf("error starting node: %s", err) 318 } 319 if err := net.Start(node.ID()); err != nil { 320 return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 321 } 322 if err := triggerChecks(trigger, net, node.ID()); err != nil { 323 return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 324 } 325 // TODO we shouldn't be equating underaddr and overaddr like this, as they are not the same in production 326 ids[i] = node.ID() 327 a := ids[i].Bytes() 328 329 addrs = append(addrs, a) 330 } 331 332 // run a simulation which connects the 10 nodes in a ring and waits 333 // for full peer discovery 334 335 var restartTime time.Time 336 337 action := func(ctx context.Context) error { 338 ticker := time.NewTicker(500 * time.Millisecond) 339 340 for range ticker.C { 341 isHealthy := true 342 for _, id := range ids { 343 //call Healthy RPC 344 node := net.GetNode(id) 345 if node == nil { 346 return fmt.Errorf("unknown node: %s", id) 347 } 348 client, err := node.Client() 349 if err != nil { 350 return fmt.Errorf("error getting node client: %s", err) 351 } 352 healthy := &network.Health{} 353 addr := id.String() 354 ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) 355 if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { 356 return fmt.Errorf("error getting node health: %s", err) 357 } 358 359 log.Info(fmt.Sprintf("NODE: %s, IS HEALTHY: %t", addr, healthy.ConnectNN && healthy.KnowNN && healthy.CountKnowNN > 0)) 360 var nodeStr string 361 if err := client.Call(&nodeStr, "hive_string"); err != nil { 362 return fmt.Errorf("error getting node string %s", err) 363 } 364 log.Info(nodeStr) 365 if !healthy.ConnectNN || healthy.CountKnowNN == 0 { 366 isHealthy = false 367 break 368 } 369 } 370 if isHealthy { 371 break 372 } 373 } 374 ticker.Stop() 375 376 log.Info("reached healthy kademlia. starting to shutdown nodes.") 377 shutdownStarted := time.Now() 378 // stop all ids, then start them again 379 for _, id := range ids { 380 node := net.GetNode(id) 381 382 if err := net.Stop(node.ID()); err != nil { 383 return fmt.Errorf("error stopping node %s: %s", node.ID().TerminalString(), err) 384 } 385 } 386 log.Info(fmt.Sprintf("shutting down nodes took: %s", time.Since(shutdownStarted))) 387 persistenceEnabled = true 388 discoveryEnabled = false 389 restartTime = time.Now() 390 for _, id := range ids { 391 node := net.GetNode(id) 392 if err := net.Start(node.ID()); err != nil { 393 return fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err) 394 } 395 if err := triggerChecks(trigger, net, node.ID()); err != nil { 396 return fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err) 397 } 398 } 399 400 log.Info(fmt.Sprintf("restarting nodes took: %s", time.Since(restartTime))) 401 402 return nil 403 } 404 net.ConnectNodesChain(nil) 405 log.Debug(fmt.Sprintf("nodes: %v", len(addrs))) 406 // construct the peer pot, so that kademlia health can be checked 407 check := func(ctx context.Context, id enode.ID) (bool, error) { 408 select { 409 case <-ctx.Done(): 410 return false, ctx.Err() 411 default: 412 } 413 414 node := net.GetNode(id) 415 if node == nil { 416 return false, fmt.Errorf("unknown node: %s", id) 417 } 418 client, err := node.Client() 419 if err != nil { 420 return false, fmt.Errorf("error getting node client: %s", err) 421 } 422 healthy := &network.Health{} 423 ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) 424 425 if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { 426 return false, fmt.Errorf("error getting node health: %s", err) 427 } 428 log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN)) 429 430 return healthy.KnowNN && healthy.ConnectNN, nil 431 } 432 433 // 64 nodes ~ 1min 434 // 128 nodes ~ 435 timeout := 300 * time.Second 436 ctx, cancel := context.WithTimeout(context.Background(), timeout) 437 defer cancel() 438 result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{ 439 Action: action, 440 Trigger: trigger, 441 Expect: &simulations.Expectation{ 442 Nodes: ids, 443 Check: check, 444 }, 445 }) 446 if result.Error != nil { 447 return result, nil 448 } 449 450 return result, nil 451 } 452 453 // triggerChecks triggers a simulation step check whenever a peer is added or 454 // removed from the given node, and also every second to avoid a race between 455 // peer events and kademlia becoming healthy 456 func triggerChecks(trigger chan enode.ID, net *simulations.Network, id enode.ID) error { 457 node := net.GetNode(id) 458 if node == nil { 459 return fmt.Errorf("unknown node: %s", id) 460 } 461 client, err := node.Client() 462 if err != nil { 463 return err 464 } 465 events := make(chan *p2p.PeerEvent) 466 sub, err := client.Subscribe(context.Background(), "admin", events, "peerEvents") 467 if err != nil { 468 return fmt.Errorf("error getting peer events for node %v: %s", id, err) 469 } 470 go func() { 471 defer sub.Unsubscribe() 472 473 tick := time.NewTicker(time.Second) 474 defer tick.Stop() 475 476 for { 477 select { 478 case <-events: 479 trigger <- id 480 case <-tick.C: 481 trigger <- id 482 case err := <-sub.Err(): 483 if err != nil { 484 log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err) 485 } 486 return 487 } 488 } 489 }() 490 return nil 491 } 492 493 func newService(ctx *adapters.ServiceContext) (node.Service, error) { 494 addr := network.NewAddr(ctx.Config.Node()) 495 496 kp := network.NewKadParams() 497 kp.NeighbourhoodSize = testNeighbourhoodSize 498 499 if ctx.Config.Reachable != nil { 500 kp.Reachable = func(o *network.BzzAddr) bool { 501 return ctx.Config.Reachable(o.ID()) 502 } 503 } 504 kad := network.NewKademlia(addr.Over(), kp) 505 hp := network.NewHiveParams() 506 hp.KeepAliveInterval = time.Duration(200) * time.Millisecond 507 hp.Discovery = discoveryEnabled 508 509 log.Info(fmt.Sprintf("discovery for nodeID %s is %t", ctx.Config.ID.String(), hp.Discovery)) 510 511 config := &network.BzzConfig{ 512 OverlayAddr: addr.Over(), 513 UnderlayAddr: addr.Under(), 514 HiveParams: hp, 515 } 516 517 if persistenceEnabled { 518 log.Info(fmt.Sprintf("persistence enabled for nodeID %s", ctx.Config.ID.String())) 519 store, err := getDbStore(ctx.Config.ID.String()) 520 if err != nil { 521 return nil, err 522 } 523 return network.NewBzz(config, kad, store, nil, nil), nil 524 } 525 526 return network.NewBzz(config, kad, nil, nil, nil), nil 527 }