github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/common/cluster/deliver_test.go (about) 1 /* 2 Copyright hechain. 2018 All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package cluster_test 8 9 import ( 10 "context" 11 "crypto/rand" 12 "encoding/hex" 13 "fmt" 14 "net" 15 "strconv" 16 "strings" 17 "sync" 18 "sync/atomic" 19 "testing" 20 "time" 21 22 "github.com/golang/protobuf/proto" 23 "github.com/hechain20/hechain/bccsp/factory" 24 "github.com/hechain20/hechain/common/flogging" 25 "github.com/hechain20/hechain/internal/pkg/comm" 26 "github.com/hechain20/hechain/internal/pkg/identity" 27 "github.com/hechain20/hechain/orderer/common/cluster" 28 "github.com/hechain20/hechain/orderer/common/cluster/mocks" 29 "github.com/hechain20/hechain/protoutil" 30 "github.com/hyperledger/fabric-protos-go/common" 31 "github.com/hyperledger/fabric-protos-go/orderer" 32 "github.com/onsi/gomega" 33 "github.com/pkg/errors" 34 "github.com/stretchr/testify/require" 35 "go.uber.org/zap" 36 "go.uber.org/zap/zapcore" 37 "google.golang.org/grpc" 38 "google.golang.org/grpc/balancer" 39 "google.golang.org/grpc/balancer/roundrobin" 40 ) 41 42 // protects gRPC balancer registration 43 var gRPCBalancerLock = sync.Mutex{} 44 45 func init() { 46 factory.InitFactories(nil) 47 } 48 49 //go:generate counterfeiter -o mocks/signer_serializer.go --fake-name SignerSerializer . signerSerializer 50 51 type signerSerializer interface { 52 identity.SignerSerializer 53 } 54 55 type wrappedBalancer struct { 56 balancer.Balancer 57 cd *countingDialer 58 } 59 60 func (wb *wrappedBalancer) Close() { 61 defer atomic.AddUint32(&wb.cd.connectionCount, ^uint32(0)) 62 wb.Balancer.Close() 63 } 64 65 type countingDialer struct { 66 name string 67 baseBuilder balancer.Builder 68 connectionCount uint32 69 } 70 71 func newCountingDialer() *countingDialer { 72 gRPCBalancerLock.Lock() 73 builder := balancer.Get(roundrobin.Name) 74 gRPCBalancerLock.Unlock() 75 76 buff := make([]byte, 16) 77 rand.Read(buff) 78 cb := &countingDialer{ 79 name: hex.EncodeToString(buff), 80 baseBuilder: builder, 81 } 82 83 gRPCBalancerLock.Lock() 84 balancer.Register(cb) 85 gRPCBalancerLock.Unlock() 86 87 return cb 88 } 89 90 func (d *countingDialer) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 91 defer atomic.AddUint32(&d.connectionCount, 1) 92 lb := d.baseBuilder.Build(cc, opts) 93 return &wrappedBalancer{ 94 Balancer: lb, 95 cd: d, 96 } 97 } 98 99 func (d *countingDialer) Name() string { 100 return d.name 101 } 102 103 func (d *countingDialer) assertAllConnectionsClosed(t *testing.T) { 104 timeLimit := time.Now().Add(timeout) 105 for atomic.LoadUint32(&d.connectionCount) != uint32(0) && time.Now().Before(timeLimit) { 106 time.Sleep(time.Millisecond) 107 } 108 require.Equal(t, uint32(0), atomic.LoadUint32(&d.connectionCount)) 109 } 110 111 func (d *countingDialer) Dial(address cluster.EndpointCriteria) (*grpc.ClientConn, error) { 112 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) 113 defer cancel() 114 115 gRPCBalancerLock.Lock() 116 balancer := grpc.WithBalancerName(d.name) 117 gRPCBalancerLock.Unlock() 118 return grpc.DialContext(ctx, address.Endpoint, grpc.WithBlock(), grpc.WithInsecure(), balancer) 119 } 120 121 func noopBlockVerifierf(_ []*common.Block, _ string) error { 122 return nil 123 } 124 125 func readSeekEnvelope(stream orderer.AtomicBroadcast_DeliverServer) (*orderer.SeekInfo, string, error) { 126 env, err := stream.Recv() 127 if err != nil { 128 return nil, "", err 129 } 130 payload, err := protoutil.UnmarshalPayload(env.Payload) 131 if err != nil { 132 return nil, "", err 133 } 134 seekInfo := &orderer.SeekInfo{} 135 if err = proto.Unmarshal(payload.Data, seekInfo); err != nil { 136 return nil, "", err 137 } 138 chdr := &common.ChannelHeader{} 139 if err = proto.Unmarshal(payload.Header.ChannelHeader, chdr); err != nil { 140 return nil, "", err 141 } 142 return seekInfo, chdr.ChannelId, nil 143 } 144 145 type deliverServer struct { 146 logger *flogging.FabricLogger 147 148 t *testing.T 149 sync.Mutex 150 err error 151 srv *comm.GRPCServer 152 seekAssertions chan func(*orderer.SeekInfo, string) 153 blockResponses chan *orderer.DeliverResponse 154 done chan struct{} 155 } 156 157 func (ds *deliverServer) endpointCriteria() cluster.EndpointCriteria { 158 return cluster.EndpointCriteria{Endpoint: ds.srv.Address()} 159 } 160 161 func (ds *deliverServer) isFaulty() bool { 162 ds.Lock() 163 defer ds.Unlock() 164 return ds.err != nil 165 } 166 167 func (*deliverServer) Broadcast(orderer.AtomicBroadcast_BroadcastServer) error { 168 panic("implement me") 169 } 170 171 func (ds *deliverServer) Deliver(stream orderer.AtomicBroadcast_DeliverServer) error { 172 ds.Lock() 173 err := ds.err 174 ds.Unlock() 175 176 if err != nil { 177 return err 178 } 179 seekInfo, channel, err := readSeekEnvelope(stream) 180 require.NoError(ds.t, err) 181 182 // FAB-16233 This is meant to mitigate timeouts when 183 // seekAssertions does not receive a value 184 timer := time.NewTimer(1 * time.Minute) 185 defer timer.Stop() 186 187 select { 188 case <-timer.C: 189 ds.t.Fatalf("timed out waiting for seek assertions to receive a value\n") 190 // Get the next seek assertion and ensure the next seek is of the expected type 191 case seekAssert := <-ds.seekAssertions: 192 ds.logger.Debugf("Received seekInfo: %+v", seekInfo) 193 seekAssert(seekInfo, channel) 194 case <-ds.done: 195 return nil 196 } 197 198 if seekInfo.GetStart().GetSpecified() != nil { 199 return ds.deliverBlocks(stream) 200 } 201 if seekInfo.GetStart().GetNewest() != nil { 202 select { 203 case resp := <-ds.blocks(): 204 if resp == nil { 205 return nil 206 } 207 return stream.Send(resp) 208 case <-ds.done: 209 } 210 } 211 ds.t.Fatalf("expected either specified or newest seek but got %v\n", seekInfo.GetStart()) 212 return nil // unreachable 213 } 214 215 func (ds *deliverServer) deliverBlocks(stream orderer.AtomicBroadcast_DeliverServer) error { 216 for { 217 blockChan := ds.blocks() 218 var response *orderer.DeliverResponse 219 select { 220 case response = <-blockChan: 221 case <-ds.done: 222 return nil 223 } 224 225 // A nil response is a signal from the test to close the stream. 226 // This is needed to avoid reading from the block buffer, hence 227 // consuming by accident a block that is tabled to be pulled 228 // later in the test. 229 if response == nil { 230 return nil 231 } 232 if err := stream.Send(response); err != nil { 233 return err 234 } 235 } 236 } 237 238 func (ds *deliverServer) blocks() chan *orderer.DeliverResponse { 239 ds.Lock() 240 defer ds.Unlock() 241 blockChan := ds.blockResponses 242 return blockChan 243 } 244 245 func (ds *deliverServer) setBlocks(blocks chan *orderer.DeliverResponse) { 246 ds.Lock() 247 defer ds.Unlock() 248 ds.blockResponses = blocks 249 } 250 251 func (ds *deliverServer) port() int { 252 _, portStr, err := net.SplitHostPort(ds.srv.Address()) 253 require.NoError(ds.t, err) 254 255 port, err := strconv.ParseInt(portStr, 10, 32) 256 require.NoError(ds.t, err) 257 return int(port) 258 } 259 260 func (ds *deliverServer) resurrect() { 261 var err error 262 // copy the responses channel into a fresh one 263 respChan := make(chan *orderer.DeliverResponse, 100) 264 for resp := range ds.blocks() { 265 respChan <- resp 266 } 267 ds.blockResponses = respChan 268 ds.done = make(chan struct{}) 269 ds.srv.Stop() 270 // And re-create the gRPC server on that port 271 ds.srv, err = comm.NewGRPCServer(fmt.Sprintf("127.0.0.1:%d", ds.port()), comm.ServerConfig{}) 272 require.NoError(ds.t, err) 273 orderer.RegisterAtomicBroadcastServer(ds.srv.Server(), ds) 274 go ds.srv.Start() 275 } 276 277 func (ds *deliverServer) stop() { 278 ds.srv.Stop() 279 close(ds.blocks()) 280 close(ds.done) 281 } 282 283 func (ds *deliverServer) enqueueResponse(seq uint64) { 284 select { 285 case ds.blocks() <- &orderer.DeliverResponse{ 286 Type: &orderer.DeliverResponse_Block{Block: protoutil.NewBlock(seq, nil)}, 287 }: 288 case <-ds.done: 289 } 290 } 291 292 func (ds *deliverServer) addExpectProbeAssert() { 293 select { 294 case ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) { 295 require.NotNil(ds.t, info.GetStart().GetNewest()) 296 require.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT) 297 }: 298 case <-ds.done: 299 } 300 } 301 302 func (ds *deliverServer) addExpectPullAssert(seq uint64) { 303 select { 304 case ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) { 305 seekPosition := info.GetStart() 306 require.NotNil(ds.t, seekPosition) 307 seekSpecified := seekPosition.GetSpecified() 308 require.NotNil(ds.t, seekSpecified) 309 require.Equal(ds.t, seq, seekSpecified.Number) 310 require.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT) 311 }: 312 case <-ds.done: 313 } 314 } 315 316 func newClusterNode(t *testing.T) *deliverServer { 317 srv, err := comm.NewGRPCServer("127.0.0.1:0", comm.ServerConfig{}) 318 require.NoError(t, err) 319 ds := &deliverServer{ 320 logger: flogging.MustGetLogger("test.debug"), 321 t: t, 322 seekAssertions: make(chan func(*orderer.SeekInfo, string), 100), 323 blockResponses: make(chan *orderer.DeliverResponse, 100), 324 done: make(chan struct{}), 325 srv: srv, 326 } 327 orderer.RegisterAtomicBroadcastServer(srv.Server(), ds) 328 go srv.Start() 329 return ds 330 } 331 332 func newBlockPuller(dialer *countingDialer, orderers ...string) *cluster.BlockPuller { 333 return &cluster.BlockPuller{ 334 Dialer: dialer, 335 Channel: "mychannel", 336 Signer: &mocks.SignerSerializer{}, 337 Endpoints: endpointCriteriaFromEndpoints(orderers...), 338 FetchTimeout: time.Second * 10, 339 MaxTotalBufferBytes: 1024 * 1024, // 1MB 340 RetryTimeout: time.Millisecond * 10, 341 VerifyBlockSequence: noopBlockVerifierf, 342 Logger: flogging.MustGetLogger("test"), 343 StopChannel: make(chan struct{}), 344 } 345 } 346 347 func endpointCriteriaFromEndpoints(orderers ...string) []cluster.EndpointCriteria { 348 var res []cluster.EndpointCriteria 349 for _, orderer := range orderers { 350 res = append(res, cluster.EndpointCriteria{Endpoint: orderer}) 351 } 352 return res 353 } 354 355 func TestBlockPullerBasicHappyPath(t *testing.T) { 356 // Scenario: Single ordering node, 357 // and the block puller pulls blocks 5 to 10. 358 osn := newClusterNode(t) 359 defer osn.stop() 360 361 dialer := newCountingDialer() 362 bp := newBlockPuller(dialer, osn.srv.Address()) 363 364 // The first seek request asks for the latest block 365 osn.addExpectProbeAssert() 366 // The first response says that the height is 10 367 osn.enqueueResponse(10) 368 // The next seek request is for block 5 369 osn.addExpectPullAssert(5) 370 // The later responses are the block themselves 371 for i := 5; i <= 10; i++ { 372 osn.enqueueResponse(uint64(i)) 373 } 374 375 for i := 5; i <= 10; i++ { 376 require.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 377 } 378 require.Len(t, osn.blockResponses, 0) 379 380 bp.Close() 381 dialer.assertAllConnectionsClosed(t) 382 } 383 384 func TestBlockPullerDuplicate(t *testing.T) { 385 // Scenario: The address of the ordering node 386 // is found twice in the configuration, but this 387 // doesn't create a problem. 388 osn := newClusterNode(t) 389 defer osn.stop() 390 391 dialer := newCountingDialer() 392 // Add the address twice 393 bp := newBlockPuller(dialer, osn.srv.Address(), osn.srv.Address()) 394 395 // The first seek request asks for the latest block (twice) 396 osn.addExpectProbeAssert() 397 osn.addExpectProbeAssert() 398 // The first response says that the height is 3 399 osn.enqueueResponse(3) 400 osn.enqueueResponse(3) 401 // The next seek request is for block 1, only from some OSN 402 osn.addExpectPullAssert(1) 403 // The later responses are the block themselves 404 for i := 1; i <= 3; i++ { 405 osn.enqueueResponse(uint64(i)) 406 } 407 408 for i := 1; i <= 3; i++ { 409 require.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 410 } 411 require.Len(t, osn.blockResponses, 0) 412 413 bp.Close() 414 dialer.assertAllConnectionsClosed(t) 415 } 416 417 func TestBlockPullerHeavyBlocks(t *testing.T) { 418 // Scenario: Single ordering node, 419 // and the block puller pulls 50 blocks, each 420 // weighing 1K, but the buffer can only contain 421 // 10K, so it should pull the 50 blocks in chunks of 10, 422 // and verify 5 sequences at a time. 423 424 osn := newClusterNode(t) 425 defer osn.stop() 426 osn.addExpectProbeAssert() 427 osn.addExpectPullAssert(1) 428 osn.enqueueResponse(100) // The last sequence is 100 429 430 enqueueBlockBatch := func(start, end uint64) { 431 for seq := start; seq <= end; seq++ { 432 resp := &orderer.DeliverResponse{ 433 Type: &orderer.DeliverResponse_Block{ 434 Block: protoutil.NewBlock(seq, nil), 435 }, 436 } 437 data := resp.GetBlock().Data.Data 438 resp.GetBlock().Data.Data = append(data, make([]byte, 1024)) 439 osn.blockResponses <- resp 440 } 441 } 442 443 dialer := newCountingDialer() 444 bp := newBlockPuller(dialer, osn.srv.Address()) 445 var gotBlockMessageCount int 446 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 447 if strings.Contains(entry.Message, "Got block") { 448 gotBlockMessageCount++ 449 } 450 return nil 451 })) 452 bp.MaxTotalBufferBytes = 1024 * 10 // 10K 453 454 // Enqueue only the next batch in the orderer node. 455 // This ensures that only 10 blocks are fetched into the buffer 456 // and not more. 457 for i := uint64(0); i < 5; i++ { 458 enqueueBlockBatch(i*10+uint64(1), i*10+uint64(10)) 459 for seq := i*10 + uint64(1); seq <= i*10+uint64(10); seq++ { 460 require.Equal(t, seq, bp.PullBlock(seq).Header.Number) 461 } 462 } 463 464 require.Equal(t, 50, gotBlockMessageCount) 465 bp.Close() 466 dialer.assertAllConnectionsClosed(t) 467 } 468 469 func TestBlockPullerClone(t *testing.T) { 470 // Scenario: We have a block puller that is connected 471 // to an ordering node, and we clone it. 472 // We expect that the new block puller is a clean slate 473 // and doesn't share any internal state with its origin. 474 osn1 := newClusterNode(t) 475 defer osn1.stop() 476 477 osn1.addExpectProbeAssert() 478 osn1.addExpectPullAssert(1) 479 // last block sequence is 100 480 osn1.enqueueResponse(100) 481 osn1.enqueueResponse(1) 482 // The block puller is expected to disconnect after pulling 483 // a single block. So signal the server-side to avoid 484 // grabbing the next block after block 1 is pulled. 485 osn1.blockResponses <- nil 486 487 dialer := newCountingDialer() 488 bp := newBlockPuller(dialer, osn1.srv.Address()) 489 bp.FetchTimeout = time.Millisecond * 100 490 // Pull a block at a time and don't buffer them 491 bp.MaxTotalBufferBytes = 1 492 // Clone the block puller 493 bpClone := bp.Clone() 494 // and override its channel 495 bpClone.Channel = "foo" 496 // Ensure the channel change doesn't reflect in the original puller 497 require.Equal(t, "mychannel", bp.Channel) 498 499 block := bp.PullBlock(1) 500 require.Equal(t, uint64(1), block.Header.Number) 501 502 // After the original block puller is closed, the 503 // clone should not be affected 504 bp.Close() 505 dialer.assertAllConnectionsClosed(t) 506 507 // The clone block puller should not have cached the internal state 508 // from its origin block puller, thus it should probe again for the last block 509 // sequence as if it is a new block puller. 510 osn1.addExpectProbeAssert() 511 osn1.addExpectPullAssert(2) 512 osn1.enqueueResponse(100) 513 osn1.enqueueResponse(2) 514 515 block = bpClone.PullBlock(2) 516 require.Equal(t, uint64(2), block.Header.Number) 517 518 bpClone.Close() 519 dialer.assertAllConnectionsClosed(t) 520 } 521 522 func TestBlockPullerHeightsByEndpoints(t *testing.T) { 523 // Scenario: We ask for the latest block from all the known ordering nodes. 524 // One ordering node is unavailable (offline). 525 // One ordering node doesn't have blocks for that channel. 526 // The remaining node returns the latest block. 527 osn1 := newClusterNode(t) 528 529 osn2 := newClusterNode(t) 530 defer osn2.stop() 531 532 osn3 := newClusterNode(t) 533 defer osn3.stop() 534 535 dialer := newCountingDialer() 536 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address()) 537 538 // The first seek request asks for the latest block from some ordering node 539 osn1.addExpectProbeAssert() 540 osn2.addExpectProbeAssert() 541 osn3.addExpectProbeAssert() 542 543 // The first ordering node is offline 544 osn1.stop() 545 // The second returns a bad answer 546 osn2.blockResponses <- &orderer.DeliverResponse{ 547 Type: &orderer.DeliverResponse_Status{Status: common.Status_FORBIDDEN}, 548 } 549 // The third returns the latest block 550 osn3.enqueueResponse(5) 551 552 res, err := bp.HeightsByEndpoints() 553 require.NoError(t, err) 554 expected := map[string]uint64{ 555 osn3.srv.Address(): 6, 556 } 557 require.Equal(t, expected, res) 558 559 bp.Close() 560 dialer.assertAllConnectionsClosed(t) 561 } 562 563 func TestBlockPullerMultipleOrderers(t *testing.T) { 564 // Scenario: 3 ordering nodes, 565 // and the block puller pulls blocks 3 to 5 from some 566 // orderer node. 567 // We ensure that exactly a single orderer node sent blocks. 568 569 osn1 := newClusterNode(t) 570 defer osn1.stop() 571 572 osn2 := newClusterNode(t) 573 defer osn2.stop() 574 575 osn3 := newClusterNode(t) 576 defer osn3.stop() 577 578 dialer := newCountingDialer() 579 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address()) 580 581 // The first seek request asks for the latest block from some ordering node 582 osn1.addExpectProbeAssert() 583 osn2.addExpectProbeAssert() 584 osn3.addExpectProbeAssert() 585 // The first response says that the height is 5 586 osn1.enqueueResponse(5) 587 osn2.enqueueResponse(5) 588 osn3.enqueueResponse(5) 589 590 // The next seek request is for block 3 591 osn1.addExpectPullAssert(3) 592 osn2.addExpectPullAssert(3) 593 osn3.addExpectPullAssert(3) 594 595 // The later responses are the block themselves 596 for i := 3; i <= 5; i++ { 597 osn1.enqueueResponse(uint64(i)) 598 osn2.enqueueResponse(uint64(i)) 599 osn3.enqueueResponse(uint64(i)) 600 } 601 602 initialTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses) 603 604 for i := 3; i <= 5; i++ { 605 require.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 606 } 607 608 // Assert the cumulative amount of blocks in the OSNs went down by 6: 609 // blocks 3, 4, 5 were pulled - that's 3 blocks. 610 // block 5 was pulled 3 times at the probe phase. 611 finalTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses) 612 require.Equal(t, initialTotalBlockAmount-6, finalTotalBlockAmount) 613 614 bp.Close() 615 dialer.assertAllConnectionsClosed(t) 616 } 617 618 func TestBlockPullerFailover(t *testing.T) { 619 // Scenario: 620 // The block puller is expected to pull blocks 1 to 3. 621 // There are two ordering nodes, but at first - only node 1 is available. 622 // the block puller first connects to it, but as it pulls the first block, 623 // it crashes. 624 // A second orderer is then spawned and the block puller is expected to 625 // connect to it and pull the rest of the blocks. 626 627 osn1 := newClusterNode(t) 628 osn1.addExpectProbeAssert() 629 osn1.addExpectPullAssert(1) 630 osn1.enqueueResponse(3) 631 osn1.enqueueResponse(1) 632 633 osn2 := newClusterNode(t) 634 defer osn2.stop() 635 636 osn2.addExpectProbeAssert() 637 osn2.addExpectPullAssert(1) 638 // First response is for the probe 639 osn2.enqueueResponse(3) 640 // Next three responses are for the pulling. 641 osn2.enqueueResponse(1) 642 osn2.enqueueResponse(2) 643 osn2.enqueueResponse(3) 644 645 // First we stop node 2 to make sure the block puller can't connect to it when it is created 646 osn2.stop() 647 648 dialer := newCountingDialer() 649 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address()) 650 651 // We don't want to rely on fetch timeout, but on pure failover logic, 652 // so make the fetch timeout huge 653 bp.FetchTimeout = time.Hour 654 655 // Configure the block puller logger to signal the wait group once the block puller 656 // received the first block. 657 pulledBlock1Done := make(chan struct{}) 658 var once sync.Once 659 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 660 if strings.Contains(entry.Message, "Got block [1] of size") { 661 once.Do(func() { close(pulledBlock1Done) }) 662 } 663 return nil 664 })) 665 666 go func() { 667 // Wait for the block puller to pull the first block 668 select { 669 case <-pulledBlock1Done: 670 case <-time.After(10 * time.Second): 671 t.Errorf("message was not logged before timeout") 672 return 673 } 674 // and now crash node1 and resurrect node 2 675 osn1.stop() 676 osn2.resurrect() 677 }() 678 679 // Assert reception of blocks 1 to 3 680 require.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 681 require.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 682 require.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 683 684 bp.Close() 685 dialer.assertAllConnectionsClosed(t) 686 } 687 688 func TestBlockPullerNoneResponsiveOrderer(t *testing.T) { 689 // Scenario: There are two ordering nodes, and the block puller 690 // connects to one of them. 691 // it fetches the first block, but then the second block isn't sent 692 // for too long, and the block puller should abort, and try the other 693 // node. 694 695 osn1 := newClusterNode(t) 696 defer osn1.stop() 697 698 osn2 := newClusterNode(t) 699 defer osn2.stop() 700 701 osn1.addExpectProbeAssert() 702 osn2.addExpectProbeAssert() 703 osn1.enqueueResponse(3) 704 osn2.enqueueResponse(3) 705 706 dialer := newCountingDialer() 707 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address()) 708 bp.FetchTimeout = time.Millisecond * 100 709 710 notInUseOrdererNode := osn2 711 // Configure the logger to tell us who is the orderer node that the block puller 712 // isn't connected to. This is done by intercepting the appropriate message 713 waitForConnectionDone := make(chan struct{}) 714 var once sync.Once 715 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 716 if !strings.Contains(entry.Message, "Sending request for block [1]") { 717 return nil 718 } 719 defer once.Do(func() { close(waitForConnectionDone) }) 720 s := entry.Message[len("Sending request for block [1] to 127.0.0.1:"):] 721 port, err := strconv.ParseInt(s, 10, 32) 722 require.NoError(t, err) 723 // If osn2 is the current orderer we're connected to, 724 // the orderer we're not connected to, is osn1 725 if osn2.port() == int(port) { 726 notInUseOrdererNode = osn1 727 // Enqueue block 1 to the current orderer the block puller 728 // is connected to 729 osn2.enqueueResponse(1) 730 osn2.addExpectPullAssert(1) 731 } else { 732 // We're connected to osn1, so enqueue block 1 733 osn1.enqueueResponse(1) 734 osn1.addExpectPullAssert(1) 735 } 736 return nil 737 })) 738 739 go func() { 740 select { 741 case <-waitForConnectionDone: 742 case <-time.After(10 * time.Second): 743 t.Errorf("message was not logged before timeout") 744 return 745 } 746 747 // Enqueue the height int the orderer we're connected to 748 notInUseOrdererNode.enqueueResponse(3) 749 notInUseOrdererNode.addExpectProbeAssert() 750 // Enqueue blocks 1, 2, 3 to the orderer node we're not connected to. 751 notInUseOrdererNode.addExpectPullAssert(1) 752 notInUseOrdererNode.enqueueResponse(1) 753 notInUseOrdererNode.enqueueResponse(2) 754 notInUseOrdererNode.enqueueResponse(3) 755 }() 756 757 // Assert reception of blocks 1 to 3 758 require.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 759 require.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 760 require.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 761 762 bp.Close() 763 dialer.assertAllConnectionsClosed(t) 764 } 765 766 func TestBlockPullerNoOrdererAliveAtStartup(t *testing.T) { 767 // Scenario: Single ordering node, and when the block puller 768 // starts up - the orderer is nowhere to be found. 769 osn := newClusterNode(t) 770 osn.stop() 771 defer osn.stop() 772 773 dialer := newCountingDialer() 774 bp := newBlockPuller(dialer, osn.srv.Address()) 775 776 // Configure the logger to wait until the a failed connection attempt was made 777 connectionAttemptDone := make(chan struct{}) 778 var closeOnce sync.Once 779 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 780 if strings.Contains(entry.Message, "Failed connecting to") { 781 closeOnce.Do(func() { close(connectionAttemptDone) }) 782 } 783 return nil 784 })) 785 786 go func() { 787 select { 788 case <-connectionAttemptDone: 789 case <-time.After(10 * time.Second): 790 t.Errorf("message was not logged before timeout") 791 return 792 } 793 794 osn.resurrect() 795 // The first seek request asks for the latest block 796 osn.addExpectProbeAssert() 797 // The first response says that the last sequence is 2 798 osn.enqueueResponse(2) 799 // The next seek request is for block 1 800 osn.addExpectPullAssert(1) 801 // And the orderer returns block 1 and 2 802 osn.enqueueResponse(1) 803 osn.enqueueResponse(2) 804 }() 805 806 require.Equal(t, uint64(1), bp.PullBlock(1).Header.Number) 807 require.Equal(t, uint64(2), bp.PullBlock(2).Header.Number) 808 809 bp.Close() 810 dialer.assertAllConnectionsClosed(t) 811 } 812 813 func TestBlockPullerFailures(t *testing.T) { 814 // This test case is flaky, so let's add some logs for the next time it fails. 815 flogging.ActivateSpec("debug") 816 defer flogging.ActivateSpec("info") 817 testLogger := flogging.MustGetLogger("test.debug") 818 819 // Scenario: Single ordering node is faulty, but later 820 // on it recovers. 821 failureError := errors.New("oops, something went wrong") 822 failStream := func(osn *deliverServer, _ *cluster.BlockPuller) { 823 testLogger.Info("failStream") 824 osn.Lock() 825 osn.err = failureError 826 osn.Unlock() 827 } 828 829 badSigErr := errors.New("bad signature") 830 malformBlockSignatureAndRecreateOSNBuffer := func(osn *deliverServer, bp *cluster.BlockPuller) { 831 testLogger.Info("FailFunc of: failure at verifying pulled block") 832 bp.VerifyBlockSequence = func(_ []*common.Block, _ string) error { 833 close(osn.blocks()) 834 // After failing once, recover and remove the bad signature error. 835 defer func() { 836 // Skip recovery if we already recovered. 837 if badSigErr == nil { 838 return 839 } 840 badSigErr = nil 841 osn.setBlocks(make(chan *orderer.DeliverResponse, 100)) 842 osn.enqueueResponse(3) 843 osn.enqueueResponse(1) 844 osn.enqueueResponse(2) 845 osn.enqueueResponse(3) 846 }() 847 return badSigErr 848 } 849 } 850 851 noFailFunc := func(_ *deliverServer, _ *cluster.BlockPuller) {} 852 853 recover := func(osn *deliverServer, bp *cluster.BlockPuller) func(entry zapcore.Entry) error { 854 return func(entry zapcore.Entry) error { 855 if osn.isFaulty() && strings.Contains(entry.Message, failureError.Error()) { 856 testLogger.Info("recover osn.err") 857 osn.Lock() 858 osn.err = nil 859 osn.Unlock() 860 } 861 if strings.Contains(entry.Message, "Failed verifying") { 862 testLogger.Info("recover verifier") 863 bp.VerifyBlockSequence = noopBlockVerifierf 864 } 865 return nil 866 } 867 } 868 869 failAfterConnection := func(osn *deliverServer, logTrigger string, failFunc func()) func(entry zapcore.Entry) error { 870 once := &sync.Once{} 871 return func(entry zapcore.Entry) error { 872 if !osn.isFaulty() && strings.Contains(entry.Message, logTrigger) { 873 once.Do(func() { 874 failFunc() 875 }) 876 } 877 return nil 878 } 879 } 880 881 for _, testCase := range []struct { 882 name string 883 logTrigger string 884 beforeFunc func(*deliverServer, *cluster.BlockPuller) 885 failFunc func(*deliverServer, *cluster.BlockPuller) 886 }{ 887 { 888 name: "failure at probe", 889 logTrigger: "skip this for this test case", 890 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 891 failStream(osn, nil) 892 // The first seek request asks for the latest block but it fails 893 osn.addExpectProbeAssert() 894 // And the last block sequence is returned 895 osn.enqueueResponse(3) 896 // The next seek request is for block 1 897 osn.addExpectPullAssert(1) 898 }, 899 failFunc: noFailFunc, 900 }, 901 { 902 name: "failure at pull", 903 logTrigger: "Sending request for block [1]", 904 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 905 // The first seek request asks for the latest block and succeeds 906 osn.addExpectProbeAssert() 907 // But as the block puller tries to pull, the stream fails, so it should reconnect. 908 osn.addExpectProbeAssert() 909 // We need to send the latest sequence twice because the stream fails after the first time. 910 osn.enqueueResponse(3) 911 osn.enqueueResponse(3) 912 osn.addExpectPullAssert(1) 913 }, 914 failFunc: failStream, 915 }, 916 { 917 name: "failure at verifying pulled block", 918 logTrigger: "Sending request for block [1]", 919 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 920 testLogger.Infof("BeforeFunc of: %s", "failure at verifying pulled block") 921 // The first seek request asks for the latest block and succeeds 922 osn.addExpectProbeAssert() 923 osn.enqueueResponse(3) 924 // It then pulls all 3 blocks, but fails verifying them. 925 osn.addExpectPullAssert(1) 926 osn.enqueueResponse(1) 927 osn.enqueueResponse(2) 928 osn.enqueueResponse(3) 929 // So it probes again, and then requests block 1 again. 930 osn.addExpectProbeAssert() 931 osn.addExpectPullAssert(1) 932 }, 933 failFunc: malformBlockSignatureAndRecreateOSNBuffer, 934 }, 935 } { 936 testCase := testCase 937 t.Run(testCase.name, func(t *testing.T) { 938 testLogger.Infof("Starting test case: %s", testCase.name) 939 osn := newClusterNode(t) 940 defer osn.stop() 941 942 dialer := newCountingDialer() 943 bp := newBlockPuller(dialer, osn.srv.Address()) 944 945 testCase.beforeFunc(osn, bp) 946 947 // Configure the logger to trigger failure at the appropriate time. 948 fail := func() { 949 testCase.failFunc(osn, bp) 950 } 951 bp.Logger = bp.Logger.WithOptions(zap.Hooks(recover(osn, bp), failAfterConnection(osn, testCase.logTrigger, fail))) 952 953 // The orderer sends blocks 1 to 3 954 osn.enqueueResponse(1) 955 osn.enqueueResponse(2) 956 osn.enqueueResponse(3) 957 958 require.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 959 require.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 960 require.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 961 962 bp.Close() 963 dialer.assertAllConnectionsClosed(t) 964 }) 965 } 966 } 967 968 func TestBlockPullerBadBlocks(t *testing.T) { 969 // Scenario: ordering node sends malformed blocks. 970 971 // This test case is flaky, so let's add some logs for the next time it fails. 972 flogging.ActivateSpec("debug") 973 defer flogging.ActivateSpec("info") 974 testLogger := flogging.MustGetLogger("test.debug") 975 976 removeHeader := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 977 resp.GetBlock().Header = nil 978 return resp 979 } 980 981 removeData := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 982 resp.GetBlock().Data = nil 983 return resp 984 } 985 986 removeMetadata := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 987 resp.GetBlock().Metadata = nil 988 return resp 989 } 990 991 changeType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 992 resp.Type = nil 993 return resp 994 } 995 996 statusType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 997 resp.Type = &orderer.DeliverResponse_Status{ 998 Status: common.Status_INTERNAL_SERVER_ERROR, 999 } 1000 return resp 1001 } 1002 1003 changeSequence := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 1004 resp.GetBlock().Header.Number = 3 1005 return resp 1006 } 1007 1008 testcases := []struct { 1009 name string 1010 corruptBlock func(block *orderer.DeliverResponse) *orderer.DeliverResponse 1011 expectedErrMsg string 1012 }{ 1013 { 1014 name: "nil header", 1015 corruptBlock: removeHeader, 1016 expectedErrMsg: "block header is nil", 1017 }, 1018 { 1019 name: "nil data", 1020 corruptBlock: removeData, 1021 expectedErrMsg: "block data is nil", 1022 }, 1023 { 1024 name: "nil metadata", 1025 corruptBlock: removeMetadata, 1026 expectedErrMsg: "block metadata is empty", 1027 }, 1028 { 1029 name: "wrong type", 1030 corruptBlock: changeType, 1031 expectedErrMsg: "response is of type <nil>, but expected a block", 1032 }, 1033 { 1034 name: "bad type", 1035 corruptBlock: statusType, 1036 expectedErrMsg: "faulty node, received: status:INTERNAL_SERVER_ERROR ", 1037 }, 1038 { 1039 name: "wrong number", 1040 corruptBlock: changeSequence, 1041 expectedErrMsg: "got unexpected sequence", 1042 }, 1043 } 1044 1045 for _, testCase := range testcases { 1046 testCase := testCase 1047 t.Run(testCase.name, func(t *testing.T) { 1048 osn := newClusterNode(t) 1049 defer osn.stop() 1050 1051 osn.addExpectProbeAssert() 1052 osn.addExpectPullAssert(10) 1053 1054 dialer := newCountingDialer() 1055 bp := newBlockPuller(dialer, osn.srv.Address()) 1056 1057 osn.enqueueResponse(10) 1058 osn.enqueueResponse(10) 1059 // Fetch the first 1060 block := <-osn.blockResponses 1061 // Enqueue the probe response 1062 // re-insert the pull response after corrupting it, to the tail of the queue 1063 osn.blockResponses <- testCase.corruptBlock(block) 1064 // [ 10 10* ] 1065 // Expect the block puller to retry and this time give it what it wants 1066 osn.addExpectProbeAssert() 1067 osn.addExpectPullAssert(10) 1068 require.Len(t, osn.seekAssertions, 4) 1069 1070 // Wait until the block is pulled and the malleability is detected 1071 detectedBadBlockDone := make(chan struct{}) 1072 var closeOnce sync.Once 1073 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1074 if strings.Contains(entry.Message, fmt.Sprintf("Failed pulling blocks: %s", testCase.expectedErrMsg)) { 1075 closeOnce.Do(func() { close(detectedBadBlockDone) }) 1076 1077 // Close the channel to make the current server-side deliver stream close 1078 close(osn.blocks()) 1079 testLogger.Infof("Expected err log has been printed: %s\n", testCase.expectedErrMsg) 1080 // And reset the block buffer to be able to write into it again 1081 osn.setBlocks(make(chan *orderer.DeliverResponse, 100)) 1082 // Put a correct block after it, 1 for the probing and 1 for the fetch 1083 osn.enqueueResponse(10) 1084 osn.enqueueResponse(10) 1085 } 1086 return nil 1087 })) 1088 1089 endPullBlock := make(chan struct{}) 1090 go func() { 1091 bp.PullBlock(10) 1092 close(endPullBlock) 1093 }() 1094 1095 select { 1096 case <-detectedBadBlockDone: 1097 case <-time.After(10 * time.Second): 1098 t.Fatalf("expected %q to be logged but it was not seen", testCase.expectedErrMsg) 1099 } 1100 1101 select { 1102 case <-endPullBlock: 1103 case <-time.After(10 * time.Second): 1104 // Signal PullBlock to give up on the retries & mark the test as fail 1105 close(bp.StopChannel) 1106 t.Fatalf("PullBlock did not complete within time") 1107 } 1108 1109 bp.Close() 1110 dialer.assertAllConnectionsClosed(t) 1111 }) 1112 } 1113 } 1114 1115 func TestImpatientStreamFailure(t *testing.T) { 1116 osn := newClusterNode(t) 1117 dialer := newCountingDialer() 1118 defer dialer.assertAllConnectionsClosed(t) 1119 // Wait for OSN to start 1120 // by trying to connect to it 1121 var conn *grpc.ClientConn 1122 var err error 1123 1124 gt := gomega.NewGomegaWithT(t) 1125 gt.Eventually(func() (bool, error) { 1126 conn, err = dialer.Dial(osn.endpointCriteria()) 1127 return true, err 1128 }).Should(gomega.BeTrue()) 1129 newStream := cluster.NewImpatientStream(conn, time.Millisecond*100) 1130 defer conn.Close() 1131 // Shutdown the OSN 1132 osn.stop() 1133 // Ensure the port isn't open anymore 1134 gt.Eventually(func() (bool, error) { 1135 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) 1136 defer cancel() 1137 conn, _ := grpc.DialContext(ctx, osn.srv.Address(), grpc.WithBlock(), grpc.WithInsecure()) 1138 if conn != nil { 1139 conn.Close() 1140 return false, nil 1141 } 1142 return true, nil 1143 }).Should(gomega.BeTrue()) 1144 stream, err := newStream() 1145 if err != nil { 1146 return 1147 } 1148 _, err = stream.Recv() 1149 require.Error(t, err) 1150 } 1151 1152 func TestBlockPullerMaxRetriesExhausted(t *testing.T) { 1153 // Scenario: 1154 // The block puller is expected to pull blocks 1 to 3. 1155 // But the orderer only has blocks 1,2, and from some reason 1156 // it sends back block 2 twice (we do this so that we 1157 // don't rely on timeout, because timeouts are flaky in tests). 1158 // It should attempt to re-connect and to send requests 1159 // until the attempt number is exhausted, after which 1160 // it gives up, and nil is returned. 1161 1162 osn := newClusterNode(t) 1163 defer osn.stop() 1164 1165 // We report having up to block 3. 1166 osn.enqueueResponse(3) 1167 osn.addExpectProbeAssert() 1168 // We send blocks 1 1169 osn.addExpectPullAssert(1) 1170 osn.enqueueResponse(1) 1171 // And 2, twice. 1172 osn.enqueueResponse(2) 1173 osn.enqueueResponse(2) 1174 // A nil message signals the deliver stream closes. 1175 // This is to signal the server side to prepare for a new deliver 1176 // stream that the client should open. 1177 osn.blockResponses <- nil 1178 1179 for i := 0; i < 2; i++ { 1180 // Therefore, the block puller should disconnect and reconnect. 1181 osn.addExpectProbeAssert() 1182 // We report having up to block 3. 1183 osn.enqueueResponse(3) 1184 // And we expect to be asked for block 3, since blocks 1, 2 1185 // have already been passed to the caller. 1186 osn.addExpectPullAssert(3) 1187 // Once again, we send 2 instead of 3 1188 osn.enqueueResponse(2) 1189 // The client disconnects again 1190 osn.blockResponses <- nil 1191 } 1192 1193 dialer := newCountingDialer() 1194 bp := newBlockPuller(dialer, osn.srv.Address()) 1195 1196 var exhaustedRetryAttemptsLogged bool 1197 1198 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1199 if entry.Message == "Failed pulling block [3]: retry count exhausted(2)" { 1200 exhaustedRetryAttemptsLogged = true 1201 } 1202 return nil 1203 })) 1204 1205 bp.MaxPullBlockRetries = 2 1206 // We don't expect to timeout in this test, so make the timeout large 1207 // to prevent flakes due to CPU starvation. 1208 bp.FetchTimeout = time.Hour 1209 // Make the buffer tiny, only a single byte - in order deliver blocks 1210 // to the caller one by one and not store them in the buffer. 1211 bp.MaxTotalBufferBytes = 1 1212 1213 // Assert reception of blocks 1 to 3 1214 require.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 1215 require.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 1216 require.Nil(t, bp.PullBlock(uint64(3))) 1217 1218 bp.Close() 1219 dialer.assertAllConnectionsClosed(t) 1220 require.True(t, exhaustedRetryAttemptsLogged) 1221 } 1222 1223 func TestBlockPullerToBadEndpointWithStop(t *testing.T) { 1224 // Scenario: 1225 // The block puller is initialized with endpoints that do not exist. 1226 // It should attempt to re-connect but will fail, causing the PullBlock() 1227 // to block the calling go-routine. Closing the StopChannel should signal 1228 // the blocked go-routine to gives up, and nil is returned. 1229 1230 dialer := newCountingDialer() 1231 bp := newBlockPuller(dialer, "10.10.10.10:666") // Nobody is there 1232 1233 var couldNotConnectLogged bool 1234 var receivedStopLogged bool 1235 1236 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1237 if entry.Message == "Could not connect to any endpoint of [{\"CAs\":null,\"Endpoint\":\"10.10.10.10:666\"}]" { 1238 couldNotConnectLogged = true 1239 } 1240 if entry.Message == "Received a stop signal" { 1241 receivedStopLogged = true 1242 } 1243 return nil 1244 })) 1245 1246 // Retry for a long time 1247 bp.MaxPullBlockRetries = 100 1248 bp.RetryTimeout = time.Hour 1249 1250 releaseDone := make(chan struct{}) 1251 var closeOnce sync.Once 1252 1253 go func() { 1254 // But this will get stuck until the StopChannel is closed 1255 require.Nil(t, bp.PullBlock(uint64(1))) 1256 bp.Close() 1257 closeOnce.Do(func() { close(releaseDone) }) 1258 }() 1259 1260 close(bp.StopChannel) 1261 select { 1262 case <-releaseDone: 1263 case <-time.After(10 * time.Second): 1264 t.Fatalf("timed out waiting for PullBlock to complete") 1265 } 1266 1267 dialer.assertAllConnectionsClosed(t) 1268 require.True(t, couldNotConnectLogged) 1269 require.True(t, receivedStopLogged) 1270 } 1271 1272 func TestBlockPullerToBadEndpoint(t *testing.T) { 1273 // Scenario: The block puller does not get stuck in an endless loop if it cannot connect to any endpoint. 1274 dialer := newCountingDialer() 1275 bp := newBlockPuller(dialer, "10.10.10.10:666") // Nobody is there 1276 1277 var exhaustedRetryAttemptsLogged bool 1278 var couldNotConnectLogged bool 1279 1280 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1281 if entry.Message == "Failed pulling block [1]: retry count exhausted(2)" { 1282 exhaustedRetryAttemptsLogged = true 1283 } 1284 if entry.Message == "Failed to connect to some endpoint, attempts exhausted(2), seq: 1, endpoints: [{\"CAs\":null,\"Endpoint\":\"10.10.10.10:666\"}]" { 1285 couldNotConnectLogged = true 1286 } 1287 return nil 1288 })) 1289 1290 bp.MaxPullBlockRetries = 2 1291 // We don't expect to timeout in this test, so make the timeout large 1292 // to prevent flakes due to CPU starvation. 1293 bp.FetchTimeout = time.Hour 1294 1295 require.Nil(t, bp.PullBlock(uint64(1))) 1296 1297 bp.Close() 1298 dialer.assertAllConnectionsClosed(t) 1299 require.True(t, exhaustedRetryAttemptsLogged) 1300 require.True(t, couldNotConnectLogged) 1301 } 1302 1303 func TestBlockPuller_UpdateEndpoint(t *testing.T) { 1304 t.Run("HeightsByEndpoints are changing", func(t *testing.T) { 1305 // Scenario: We ask for the latest block from a set of ordering nodes. 1306 // We then add an endpoint and remove an endpoint, update the puller's endpoints, and ask again. 1307 osn1 := newClusterNode(t) 1308 defer osn1.stop() 1309 osn2 := newClusterNode(t) 1310 defer osn2.stop() 1311 osn3 := newClusterNode(t) 1312 defer osn3.stop() 1313 1314 dialer := newCountingDialer() 1315 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address()) 1316 1317 // The first seek request asks for the latest block from some ordering node 1318 osn1.addExpectProbeAssert() 1319 osn2.addExpectProbeAssert() 1320 osn3.addExpectProbeAssert() 1321 1322 osn1.enqueueResponse(3) 1323 osn2.enqueueResponse(4) 1324 osn3.enqueueResponse(5) 1325 1326 res, err := bp.HeightsByEndpoints() 1327 require.NoError(t, err) 1328 expected := map[string]uint64{ 1329 osn1.srv.Address(): 4, 1330 osn2.srv.Address(): 5, 1331 osn3.srv.Address(): 6, 1332 } 1333 require.Equal(t, expected, res) 1334 1335 osn4 := newClusterNode(t) 1336 defer osn4.stop() 1337 1338 bp.UpdateEndpoints(endpointCriteriaFromEndpoints(osn2.srv.Address(), osn3.srv.Address(), osn4.srv.Address())) 1339 1340 osn2.addExpectProbeAssert() 1341 osn3.addExpectProbeAssert() 1342 osn4.addExpectProbeAssert() 1343 1344 osn2.enqueueResponse(44) 1345 osn3.enqueueResponse(55) 1346 osn4.enqueueResponse(66) 1347 1348 res, err = bp.HeightsByEndpoints() 1349 require.NoError(t, err) 1350 expected = map[string]uint64{ 1351 osn2.srv.Address(): 45, 1352 osn3.srv.Address(): 56, 1353 osn4.srv.Address(): 67, 1354 } 1355 require.Equal(t, expected, res) 1356 1357 bp.Close() 1358 dialer.assertAllConnectionsClosed(t) 1359 }) 1360 1361 t.Run("switch pulling between endpoints", func(t *testing.T) { 1362 // Scenario: 1363 // The block puller is expected to pull blocks 1 to 2 from osn1. 1364 // After block 2 the endpoints are updated to osn2. 1365 // The block puller is expected to pull blocks 3 to 4 from osn2. 1366 1367 osn1 := newClusterNode(t) 1368 defer osn1.stop() 1369 1370 osn1.enqueueResponse(2) 1371 osn1.addExpectProbeAssert() 1372 // We send blocks 1 and 2 1373 osn1.addExpectPullAssert(1) 1374 osn1.enqueueResponse(1) 1375 osn1.enqueueResponse(2) 1376 1377 dialer := newCountingDialer() 1378 bp := newBlockPuller(dialer, osn1.srv.Address()) 1379 1380 require.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 1381 require.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 1382 1383 osn2 := newClusterNode(t) 1384 defer osn2.stop() 1385 1386 osn2.enqueueResponse(4) 1387 osn2.addExpectProbeAssert() 1388 // We send blocks 1 and 2 1389 osn2.addExpectPullAssert(3) 1390 osn2.enqueueResponse(3) 1391 osn2.enqueueResponse(4) 1392 1393 // This will disconnect 1394 bp.UpdateEndpoints(endpointCriteriaFromEndpoints(osn2.srv.Address())) 1395 1396 require.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 1397 require.Equal(t, uint64(4), bp.PullBlock(uint64(4)).Header.Number) 1398 1399 bp.Close() 1400 dialer.assertAllConnectionsClosed(t) 1401 }) 1402 t.Run("update to a bad endpoint", func(t *testing.T) { 1403 // Scenario: 1404 // The block puller is expected to pull blocks 1 to 3. The target orderer delivers only blocks 1,2. 1405 // The puller pulls blocks 1 & 2 into the buffer. After block 1 the endpoints are updated to a bad endpoint. 1406 // The puller gets block 2 from the buffer. It should attempt to re-connect until the attempt number is exhausted, 1407 // after which it gives up, and nil is returned. 1408 1409 osn := newClusterNode(t) 1410 1411 osn.enqueueResponse(2) 1412 osn.addExpectProbeAssert() 1413 // We send blocks 1 and 2 1414 osn.addExpectPullAssert(1) 1415 osn.enqueueResponse(1) 1416 osn.enqueueResponse(2) 1417 1418 dialer := newCountingDialer() 1419 bp := newBlockPuller(dialer, osn.srv.Address()) 1420 1421 var exhaustedRetryAttemptsLogged bool 1422 var couldNotConnectLogged bool 1423 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1424 if entry.Message == "Failed pulling block [3]: retry count exhausted(2)" { 1425 exhaustedRetryAttemptsLogged = true 1426 } 1427 if entry.Message == 1428 "Failed to connect to some endpoint, attempts exhausted(2), seq: 3, endpoints: [{\"CAs\":null,\"Endpoint\":\"10.10.10.10:666\"}]" { 1429 couldNotConnectLogged = true 1430 } 1431 return nil 1432 })) 1433 1434 bp.MaxPullBlockRetries = 2 1435 // We don't expect to timeout in this test, so make the timeout large 1436 // to prevent flakes due to CPU starvation. 1437 bp.FetchTimeout = time.Second 1438 1439 require.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 1440 osn.stop() 1441 // This will disconnect 1442 bp.UpdateEndpoints(endpointCriteriaFromEndpoints("10.10.10.10:666")) 1443 // Block 2 from the buffer 1444 require.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 1445 // Block 3 will never arrive 1446 require.Nil(t, bp.PullBlock(uint64(3))) 1447 1448 bp.Close() 1449 dialer.assertAllConnectionsClosed(t) 1450 require.True(t, exhaustedRetryAttemptsLogged) 1451 require.True(t, couldNotConnectLogged) 1452 }) 1453 }