github.com/lzy4123/fabric@v2.1.1+incompatible/orderer/common/cluster/deliver_test.go (about) 1 /* 2 Copyright IBM Corp. 2018 All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package cluster_test 8 9 import ( 10 "context" 11 "crypto/rand" 12 "encoding/hex" 13 "fmt" 14 "net" 15 "strconv" 16 "strings" 17 "sync" 18 "sync/atomic" 19 "testing" 20 "time" 21 22 "github.com/golang/protobuf/proto" 23 "github.com/hyperledger/fabric-protos-go/common" 24 "github.com/hyperledger/fabric-protos-go/orderer" 25 "github.com/hyperledger/fabric/bccsp/factory" 26 "github.com/hyperledger/fabric/common/flogging" 27 "github.com/hyperledger/fabric/internal/pkg/comm" 28 "github.com/hyperledger/fabric/internal/pkg/identity" 29 "github.com/hyperledger/fabric/orderer/common/cluster" 30 "github.com/hyperledger/fabric/orderer/common/cluster/mocks" 31 "github.com/hyperledger/fabric/protoutil" 32 "github.com/onsi/gomega" 33 "github.com/pkg/errors" 34 "github.com/stretchr/testify/assert" 35 "github.com/stretchr/testify/require" 36 "go.uber.org/zap" 37 "go.uber.org/zap/zapcore" 38 "google.golang.org/grpc" 39 "google.golang.org/grpc/balancer" 40 "google.golang.org/grpc/balancer/roundrobin" 41 ) 42 43 // protects gRPC balancer registration 44 var gRPCBalancerLock = sync.Mutex{} 45 46 func init() { 47 factory.InitFactories(nil) 48 } 49 50 //go:generate counterfeiter -o mocks/signer_serializer.go --fake-name SignerSerializer . signerSerializer 51 52 type signerSerializer interface { 53 identity.SignerSerializer 54 } 55 56 type wrappedBalancer struct { 57 balancer.Balancer 58 balancer.V2Balancer 59 cd *countingDialer 60 } 61 62 func (wb *wrappedBalancer) Close() { 63 defer atomic.AddUint32(&wb.cd.connectionCount, ^uint32(0)) 64 wb.Balancer.Close() 65 } 66 67 type countingDialer struct { 68 name string 69 baseBuilder balancer.Builder 70 connectionCount uint32 71 } 72 73 func newCountingDialer() *countingDialer { 74 gRPCBalancerLock.Lock() 75 builder := balancer.Get(roundrobin.Name) 76 gRPCBalancerLock.Unlock() 77 78 buff := make([]byte, 16) 79 rand.Read(buff) 80 cb := &countingDialer{ 81 name: hex.EncodeToString(buff), 82 baseBuilder: builder, 83 } 84 85 gRPCBalancerLock.Lock() 86 balancer.Register(cb) 87 gRPCBalancerLock.Unlock() 88 89 return cb 90 } 91 92 func (d *countingDialer) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 93 defer atomic.AddUint32(&d.connectionCount, 1) 94 lb := d.baseBuilder.Build(cc, opts) 95 return &wrappedBalancer{ 96 Balancer: lb, 97 V2Balancer: lb.(balancer.V2Balancer), 98 cd: d, 99 } 100 } 101 102 func (d *countingDialer) Name() string { 103 return d.name 104 } 105 106 func (d *countingDialer) assertAllConnectionsClosed(t *testing.T) { 107 timeLimit := time.Now().Add(timeout) 108 for atomic.LoadUint32(&d.connectionCount) != uint32(0) && time.Now().Before(timeLimit) { 109 time.Sleep(time.Millisecond) 110 } 111 assert.Equal(t, uint32(0), atomic.LoadUint32(&d.connectionCount)) 112 } 113 114 func (d *countingDialer) Dial(address cluster.EndpointCriteria) (*grpc.ClientConn, error) { 115 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) 116 defer cancel() 117 118 gRPCBalancerLock.Lock() 119 balancer := grpc.WithBalancerName(d.name) 120 gRPCBalancerLock.Unlock() 121 return grpc.DialContext(ctx, address.Endpoint, grpc.WithBlock(), grpc.WithInsecure(), balancer) 122 } 123 124 func noopBlockVerifierf(_ []*common.Block, _ string) error { 125 return nil 126 } 127 128 func readSeekEnvelope(stream orderer.AtomicBroadcast_DeliverServer) (*orderer.SeekInfo, string, error) { 129 env, err := stream.Recv() 130 if err != nil { 131 return nil, "", err 132 } 133 payload, err := protoutil.UnmarshalPayload(env.Payload) 134 if err != nil { 135 return nil, "", err 136 } 137 seekInfo := &orderer.SeekInfo{} 138 if err = proto.Unmarshal(payload.Data, seekInfo); err != nil { 139 return nil, "", err 140 } 141 chdr := &common.ChannelHeader{} 142 if err = proto.Unmarshal(payload.Header.ChannelHeader, chdr); err != nil { 143 return nil, "", err 144 } 145 return seekInfo, chdr.ChannelId, nil 146 } 147 148 type deliverServer struct { 149 t *testing.T 150 sync.Mutex 151 err error 152 srv *comm.GRPCServer 153 seekAssertions chan func(*orderer.SeekInfo, string) 154 blockResponses chan *orderer.DeliverResponse 155 } 156 157 func (ds *deliverServer) endpointCriteria() cluster.EndpointCriteria { 158 return cluster.EndpointCriteria{Endpoint: ds.srv.Address()} 159 } 160 161 func (ds *deliverServer) isFaulty() bool { 162 ds.Lock() 163 defer ds.Unlock() 164 return ds.err != nil 165 } 166 167 func (*deliverServer) Broadcast(orderer.AtomicBroadcast_BroadcastServer) error { 168 panic("implement me") 169 } 170 171 func (ds *deliverServer) Deliver(stream orderer.AtomicBroadcast_DeliverServer) error { 172 ds.Lock() 173 err := ds.err 174 ds.Unlock() 175 176 if err != nil { 177 return err 178 } 179 seekInfo, channel, err := readSeekEnvelope(stream) 180 if err != nil { 181 panic(err) 182 } 183 184 // FAB-16233 This is meant to mitigate timeouts when 185 // seekAssertions does not receive a value 186 timer := time.NewTimer(1 * time.Minute) 187 defer timer.Stop() 188 189 select { 190 case <-timer.C: 191 panic("timed out waiting for seek assertions to receive a value") 192 // Get the next seek assertion and ensure the next seek is of the expected type 193 case seekAssert := <-ds.seekAssertions: 194 seekAssert(seekInfo, channel) 195 } 196 197 if seekInfo.GetStart().GetSpecified() != nil { 198 return ds.deliverBlocks(stream) 199 } 200 if seekInfo.GetStart().GetNewest() != nil { 201 resp := <-ds.blocks() 202 if resp == nil { 203 return nil 204 } 205 return stream.Send(resp) 206 } 207 panic(fmt.Sprintf("expected either specified or newest seek but got %v", seekInfo.GetStart())) 208 } 209 210 func (ds *deliverServer) deliverBlocks(stream orderer.AtomicBroadcast_DeliverServer) error { 211 for { 212 blockChan := ds.blocks() 213 response := <-blockChan 214 // A nil response is a signal from the test to close the stream. 215 // This is needed to avoid reading from the block buffer, hence 216 // consuming by accident a block that is tabled to be pulled 217 // later in the test. 218 if response == nil { 219 return nil 220 } 221 if err := stream.Send(response); err != nil { 222 return err 223 } 224 } 225 } 226 227 func (ds *deliverServer) blocks() chan *orderer.DeliverResponse { 228 ds.Lock() 229 defer ds.Unlock() 230 blockChan := ds.blockResponses 231 return blockChan 232 } 233 234 func (ds *deliverServer) setBlocks(blocks chan *orderer.DeliverResponse) { 235 ds.Lock() 236 defer ds.Unlock() 237 ds.blockResponses = blocks 238 } 239 240 func (ds *deliverServer) port() int { 241 _, portStr, err := net.SplitHostPort(ds.srv.Address()) 242 assert.NoError(ds.t, err) 243 244 port, err := strconv.ParseInt(portStr, 10, 32) 245 assert.NoError(ds.t, err) 246 return int(port) 247 } 248 249 func (ds *deliverServer) resurrect() { 250 var err error 251 // copy the responses channel into a fresh one 252 respChan := make(chan *orderer.DeliverResponse, 100) 253 for resp := range ds.blocks() { 254 respChan <- resp 255 } 256 ds.blockResponses = respChan 257 ds.srv.Stop() 258 // And re-create the gRPC server on that port 259 ds.srv, err = comm.NewGRPCServer(fmt.Sprintf("127.0.0.1:%d", ds.port()), comm.ServerConfig{}) 260 assert.NoError(ds.t, err) 261 orderer.RegisterAtomicBroadcastServer(ds.srv.Server(), ds) 262 go ds.srv.Start() 263 } 264 265 func (ds *deliverServer) stop() { 266 ds.srv.Stop() 267 close(ds.blocks()) 268 } 269 270 func (ds *deliverServer) enqueueResponse(seq uint64) { 271 ds.blocks() <- &orderer.DeliverResponse{ 272 Type: &orderer.DeliverResponse_Block{Block: protoutil.NewBlock(seq, nil)}, 273 } 274 } 275 276 func (ds *deliverServer) addExpectProbeAssert() { 277 ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) { 278 assert.NotNil(ds.t, info.GetStart().GetNewest()) 279 assert.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT) 280 } 281 } 282 283 func (ds *deliverServer) addExpectPullAssert(seq uint64) { 284 ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) { 285 seekPosition := info.GetStart() 286 require.NotNil(ds.t, seekPosition) 287 seekSpecified := seekPosition.GetSpecified() 288 require.NotNil(ds.t, seekSpecified) 289 assert.Equal(ds.t, seq, seekSpecified.Number) 290 assert.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT) 291 } 292 } 293 294 func newClusterNode(t *testing.T) *deliverServer { 295 srv, err := comm.NewGRPCServer("127.0.0.1:0", comm.ServerConfig{}) 296 if err != nil { 297 panic(err) 298 } 299 ds := &deliverServer{ 300 t: t, 301 seekAssertions: make(chan func(*orderer.SeekInfo, string), 100), 302 blockResponses: make(chan *orderer.DeliverResponse, 100), 303 srv: srv, 304 } 305 orderer.RegisterAtomicBroadcastServer(srv.Server(), ds) 306 go srv.Start() 307 return ds 308 } 309 310 func newBlockPuller(dialer *countingDialer, orderers ...string) *cluster.BlockPuller { 311 return &cluster.BlockPuller{ 312 Dialer: dialer, 313 Channel: "mychannel", 314 Signer: &mocks.SignerSerializer{}, 315 Endpoints: endpointCriteriaFromEndpoints(orderers...), 316 FetchTimeout: time.Second * 10, 317 MaxTotalBufferBytes: 1024 * 1024, // 1MB 318 RetryTimeout: time.Millisecond * 10, 319 VerifyBlockSequence: noopBlockVerifierf, 320 Logger: flogging.MustGetLogger("test"), 321 } 322 } 323 324 func endpointCriteriaFromEndpoints(orderers ...string) []cluster.EndpointCriteria { 325 var res []cluster.EndpointCriteria 326 for _, orderer := range orderers { 327 res = append(res, cluster.EndpointCriteria{Endpoint: orderer}) 328 } 329 return res 330 } 331 332 func TestBlockPullerBasicHappyPath(t *testing.T) { 333 // Scenario: Single ordering node, 334 // and the block puller pulls blocks 5 to 10. 335 osn := newClusterNode(t) 336 defer osn.stop() 337 338 dialer := newCountingDialer() 339 bp := newBlockPuller(dialer, osn.srv.Address()) 340 341 // The first seek request asks for the latest block 342 osn.addExpectProbeAssert() 343 // The first response says that the height is 10 344 osn.enqueueResponse(10) 345 // The next seek request is for block 5 346 osn.addExpectPullAssert(5) 347 // The later responses are the block themselves 348 for i := 5; i <= 10; i++ { 349 osn.enqueueResponse(uint64(i)) 350 } 351 352 for i := 5; i <= 10; i++ { 353 assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 354 } 355 assert.Len(t, osn.blockResponses, 0) 356 357 bp.Close() 358 dialer.assertAllConnectionsClosed(t) 359 } 360 361 func TestBlockPullerDuplicate(t *testing.T) { 362 // Scenario: The address of the ordering node 363 // is found twice in the configuration, but this 364 // doesn't create a problem. 365 osn := newClusterNode(t) 366 defer osn.stop() 367 368 dialer := newCountingDialer() 369 // Add the address twice 370 bp := newBlockPuller(dialer, osn.srv.Address(), osn.srv.Address()) 371 372 // The first seek request asks for the latest block (twice) 373 osn.addExpectProbeAssert() 374 osn.addExpectProbeAssert() 375 // The first response says that the height is 3 376 osn.enqueueResponse(3) 377 osn.enqueueResponse(3) 378 // The next seek request is for block 1, only from some OSN 379 osn.addExpectPullAssert(1) 380 // The later responses are the block themselves 381 for i := 1; i <= 3; i++ { 382 osn.enqueueResponse(uint64(i)) 383 } 384 385 for i := 1; i <= 3; i++ { 386 assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 387 } 388 assert.Len(t, osn.blockResponses, 0) 389 390 bp.Close() 391 dialer.assertAllConnectionsClosed(t) 392 } 393 394 func TestBlockPullerHeavyBlocks(t *testing.T) { 395 // Scenario: Single ordering node, 396 // and the block puller pulls 50 blocks, each 397 // weighing 1K, but the buffer can only contain 398 // 10K, so it should pull the 50 blocks in chunks of 10, 399 // and verify 5 sequences at a time. 400 401 osn := newClusterNode(t) 402 defer osn.stop() 403 osn.addExpectProbeAssert() 404 osn.addExpectPullAssert(1) 405 osn.enqueueResponse(100) // The last sequence is 100 406 407 enqueueBlockBatch := func(start, end uint64) { 408 for seq := start; seq <= end; seq++ { 409 resp := &orderer.DeliverResponse{ 410 Type: &orderer.DeliverResponse_Block{ 411 Block: protoutil.NewBlock(seq, nil), 412 }, 413 } 414 data := resp.GetBlock().Data.Data 415 resp.GetBlock().Data.Data = append(data, make([]byte, 1024)) 416 osn.blockResponses <- resp 417 } 418 } 419 420 dialer := newCountingDialer() 421 bp := newBlockPuller(dialer, osn.srv.Address()) 422 var gotBlockMessageCount int 423 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 424 if strings.Contains(entry.Message, "Got block") { 425 gotBlockMessageCount++ 426 } 427 return nil 428 })) 429 bp.MaxTotalBufferBytes = 1024 * 10 // 10K 430 431 // Enqueue only the next batch in the orderer node. 432 // This ensures that only 10 blocks are fetched into the buffer 433 // and not more. 434 for i := uint64(0); i < 5; i++ { 435 enqueueBlockBatch(i*10+uint64(1), i*10+uint64(10)) 436 for seq := i*10 + uint64(1); seq <= i*10+uint64(10); seq++ { 437 assert.Equal(t, seq, bp.PullBlock(seq).Header.Number) 438 } 439 } 440 441 assert.Equal(t, 50, gotBlockMessageCount) 442 bp.Close() 443 dialer.assertAllConnectionsClosed(t) 444 } 445 446 func TestBlockPullerClone(t *testing.T) { 447 // Scenario: We have a block puller that is connected 448 // to an ordering node, and we clone it. 449 // We expect that the new block puller is a clean slate 450 // and doesn't share any internal state with its origin. 451 osn1 := newClusterNode(t) 452 defer osn1.stop() 453 454 osn1.addExpectProbeAssert() 455 osn1.addExpectPullAssert(1) 456 // last block sequence is 100 457 osn1.enqueueResponse(100) 458 osn1.enqueueResponse(1) 459 // The block puller is expected to disconnect after pulling 460 // a single block. So signal the server-side to avoid 461 // grabbing the next block after block 1 is pulled. 462 osn1.blockResponses <- nil 463 464 dialer := newCountingDialer() 465 bp := newBlockPuller(dialer, osn1.srv.Address()) 466 bp.FetchTimeout = time.Millisecond * 100 467 // Pull a block at a time and don't buffer them 468 bp.MaxTotalBufferBytes = 1 469 // Clone the block puller 470 bpClone := bp.Clone() 471 // and override its channel 472 bpClone.Channel = "foo" 473 // Ensure the channel change doesn't reflect in the original puller 474 assert.Equal(t, "mychannel", bp.Channel) 475 476 block := bp.PullBlock(1) 477 assert.Equal(t, uint64(1), block.Header.Number) 478 479 // After the original block puller is closed, the 480 // clone should not be affected 481 bp.Close() 482 dialer.assertAllConnectionsClosed(t) 483 484 // The clone block puller should not have cached the internal state 485 // from its origin block puller, thus it should probe again for the last block 486 // sequence as if it is a new block puller. 487 osn1.addExpectProbeAssert() 488 osn1.addExpectPullAssert(2) 489 osn1.enqueueResponse(100) 490 osn1.enqueueResponse(2) 491 492 block = bpClone.PullBlock(2) 493 assert.Equal(t, uint64(2), block.Header.Number) 494 495 bpClone.Close() 496 dialer.assertAllConnectionsClosed(t) 497 } 498 499 func TestBlockPullerHeightsByEndpoints(t *testing.T) { 500 // Scenario: We ask for the latest block from all the known ordering nodes. 501 // One ordering node is unavailable (offline). 502 // One ordering node doesn't have blocks for that channel. 503 // The remaining node returns the latest block. 504 osn1 := newClusterNode(t) 505 506 osn2 := newClusterNode(t) 507 defer osn2.stop() 508 509 osn3 := newClusterNode(t) 510 defer osn3.stop() 511 512 dialer := newCountingDialer() 513 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address()) 514 515 // The first seek request asks for the latest block from some ordering node 516 osn1.addExpectProbeAssert() 517 osn2.addExpectProbeAssert() 518 osn3.addExpectProbeAssert() 519 520 // The first ordering node is offline 521 osn1.stop() 522 // The second returns a bad answer 523 osn2.blockResponses <- &orderer.DeliverResponse{ 524 Type: &orderer.DeliverResponse_Status{Status: common.Status_FORBIDDEN}, 525 } 526 // The third returns the latest block 527 osn3.enqueueResponse(5) 528 529 res, err := bp.HeightsByEndpoints() 530 assert.NoError(t, err) 531 expected := map[string]uint64{ 532 osn3.srv.Address(): 6, 533 } 534 assert.Equal(t, expected, res) 535 536 bp.Close() 537 dialer.assertAllConnectionsClosed(t) 538 } 539 540 func TestBlockPullerMultipleOrderers(t *testing.T) { 541 // Scenario: 3 ordering nodes, 542 // and the block puller pulls blocks 3 to 5 from some 543 // orderer node. 544 // We ensure that exactly a single orderer node sent blocks. 545 546 osn1 := newClusterNode(t) 547 defer osn1.stop() 548 549 osn2 := newClusterNode(t) 550 defer osn2.stop() 551 552 osn3 := newClusterNode(t) 553 defer osn3.stop() 554 555 dialer := newCountingDialer() 556 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address()) 557 558 // The first seek request asks for the latest block from some ordering node 559 osn1.addExpectProbeAssert() 560 osn2.addExpectProbeAssert() 561 osn3.addExpectProbeAssert() 562 // The first response says that the height is 5 563 osn1.enqueueResponse(5) 564 osn2.enqueueResponse(5) 565 osn3.enqueueResponse(5) 566 567 // The next seek request is for block 3 568 osn1.addExpectPullAssert(3) 569 osn2.addExpectPullAssert(3) 570 osn3.addExpectPullAssert(3) 571 572 // The later responses are the block themselves 573 for i := 3; i <= 5; i++ { 574 osn1.enqueueResponse(uint64(i)) 575 osn2.enqueueResponse(uint64(i)) 576 osn3.enqueueResponse(uint64(i)) 577 } 578 579 initialTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses) 580 581 for i := 3; i <= 5; i++ { 582 assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 583 } 584 585 // Assert the cumulative amount of blocks in the OSNs went down by 6: 586 // blocks 3, 4, 5 were pulled - that's 3 blocks. 587 // block 5 was pulled 3 times at the probe phase. 588 finalTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses) 589 assert.Equal(t, initialTotalBlockAmount-6, finalTotalBlockAmount) 590 591 bp.Close() 592 dialer.assertAllConnectionsClosed(t) 593 } 594 595 func TestBlockPullerFailover(t *testing.T) { 596 // Scenario: 597 // The block puller is expected to pull blocks 1 to 3. 598 // There are two ordering nodes, but at first - only node 1 is available. 599 // the block puller first connects to it, but as it pulls the first block, 600 // it crashes. 601 // A second orderer is then spawned and the block puller is expected to 602 // connect to it and pull the rest of the blocks. 603 604 osn1 := newClusterNode(t) 605 osn1.addExpectProbeAssert() 606 osn1.addExpectPullAssert(1) 607 osn1.enqueueResponse(3) 608 osn1.enqueueResponse(1) 609 610 osn2 := newClusterNode(t) 611 defer osn2.stop() 612 613 osn2.addExpectProbeAssert() 614 osn2.addExpectPullAssert(1) 615 // First response is for the probe 616 osn2.enqueueResponse(3) 617 // Next three responses are for the pulling. 618 osn2.enqueueResponse(1) 619 osn2.enqueueResponse(2) 620 osn2.enqueueResponse(3) 621 622 // First we stop node 2 to make sure the block puller can't connect to it when it is created 623 osn2.stop() 624 625 dialer := newCountingDialer() 626 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address()) 627 628 // We don't want to rely on fetch timeout, but on pure failover logic, 629 // so make the fetch timeout huge 630 bp.FetchTimeout = time.Hour 631 632 // Configure the block puller logger to signal the wait group once the block puller 633 // received the first block. 634 var pulledBlock1 sync.WaitGroup 635 pulledBlock1.Add(1) 636 var once sync.Once 637 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 638 if strings.Contains(entry.Message, "Got block [1] of size") { 639 once.Do(pulledBlock1.Done) 640 } 641 return nil 642 })) 643 644 go func() { 645 // Wait for the block puller to pull the first block 646 pulledBlock1.Wait() 647 // and now crash node1 and resurrect node 2 648 osn1.stop() 649 osn2.resurrect() 650 }() 651 652 // Assert reception of blocks 1 to 3 653 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 654 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 655 assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 656 657 bp.Close() 658 dialer.assertAllConnectionsClosed(t) 659 } 660 661 func TestBlockPullerNoneResponsiveOrderer(t *testing.T) { 662 // Scenario: There are two ordering nodes, and the block puller 663 // connects to one of them. 664 // it fetches the first block, but then the second block isn't sent 665 // for too long, and the block puller should abort, and try the other 666 // node. 667 668 osn1 := newClusterNode(t) 669 defer osn1.stop() 670 671 osn2 := newClusterNode(t) 672 defer osn2.stop() 673 674 osn1.addExpectProbeAssert() 675 osn2.addExpectProbeAssert() 676 osn1.enqueueResponse(3) 677 osn2.enqueueResponse(3) 678 679 dialer := newCountingDialer() 680 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address()) 681 bp.FetchTimeout = time.Millisecond * 100 682 683 notInUseOrdererNode := osn2 684 // Configure the logger to tell us who is the orderer node that the block puller 685 // isn't connected to. This is done by intercepting the appropriate message 686 var waitForConnection sync.WaitGroup 687 waitForConnection.Add(1) 688 var once sync.Once 689 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 690 if !strings.Contains(entry.Message, "Sending request for block [1]") { 691 return nil 692 } 693 defer once.Do(waitForConnection.Done) 694 s := entry.Message[len("Sending request for block [1] to 127.0.0.1:"):] 695 port, err := strconv.ParseInt(s, 10, 32) 696 assert.NoError(t, err) 697 // If osn2 is the current orderer we're connected to, 698 // the orderer we're not connected to, is osn1 699 if osn2.port() == int(port) { 700 notInUseOrdererNode = osn1 701 // Enqueue block 1 to the current orderer the block puller 702 // is connected to 703 osn2.enqueueResponse(1) 704 osn2.addExpectPullAssert(1) 705 } else { 706 // We're connected to osn1, so enqueue block 1 707 osn1.enqueueResponse(1) 708 osn1.addExpectPullAssert(1) 709 } 710 return nil 711 })) 712 713 go func() { 714 waitForConnection.Wait() 715 // Enqueue the height int the orderer we're connected to 716 notInUseOrdererNode.enqueueResponse(3) 717 notInUseOrdererNode.addExpectProbeAssert() 718 // Enqueue blocks 1, 2, 3 to the orderer node we're not connected to. 719 notInUseOrdererNode.addExpectPullAssert(1) 720 notInUseOrdererNode.enqueueResponse(1) 721 notInUseOrdererNode.enqueueResponse(2) 722 notInUseOrdererNode.enqueueResponse(3) 723 }() 724 725 // Assert reception of blocks 1 to 3 726 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 727 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 728 assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 729 730 bp.Close() 731 dialer.assertAllConnectionsClosed(t) 732 } 733 734 func TestBlockPullerNoOrdererAliveAtStartup(t *testing.T) { 735 // Scenario: Single ordering node, and when the block puller 736 // starts up - the orderer is nowhere to be found. 737 osn := newClusterNode(t) 738 osn.stop() 739 defer osn.stop() 740 741 dialer := newCountingDialer() 742 bp := newBlockPuller(dialer, osn.srv.Address()) 743 744 // Configure the logger to wait until the a failed connection attempt was made 745 var connectionAttempt sync.WaitGroup 746 connectionAttempt.Add(1) 747 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 748 if strings.Contains(entry.Message, "Failed connecting to") { 749 connectionAttempt.Done() 750 } 751 return nil 752 })) 753 754 go func() { 755 connectionAttempt.Wait() 756 osn.resurrect() 757 // The first seek request asks for the latest block 758 osn.addExpectProbeAssert() 759 // The first response says that the last sequence is 2 760 osn.enqueueResponse(2) 761 // The next seek request is for block 1 762 osn.addExpectPullAssert(1) 763 // And the orderer returns block 1 and 2 764 osn.enqueueResponse(1) 765 osn.enqueueResponse(2) 766 }() 767 768 assert.Equal(t, uint64(1), bp.PullBlock(1).Header.Number) 769 assert.Equal(t, uint64(2), bp.PullBlock(2).Header.Number) 770 771 bp.Close() 772 dialer.assertAllConnectionsClosed(t) 773 } 774 775 func TestBlockPullerFailures(t *testing.T) { 776 // Scenario: Single ordering node is faulty, but later 777 // on it recovers. 778 failureError := errors.New("oops, something went wrong") 779 failStream := func(osn *deliverServer, _ *cluster.BlockPuller) { 780 osn.Lock() 781 osn.err = failureError 782 osn.Unlock() 783 } 784 785 badSigErr := errors.New("bad signature") 786 malformBlockSignatureAndRecreateOSNBuffer := func(osn *deliverServer, bp *cluster.BlockPuller) { 787 bp.VerifyBlockSequence = func(_ []*common.Block, _ string) error { 788 close(osn.blocks()) 789 // After failing once, recover and remove the bad signature error. 790 defer func() { 791 // Skip recovery if we already recovered. 792 if badSigErr == nil { 793 return 794 } 795 badSigErr = nil 796 osn.setBlocks(make(chan *orderer.DeliverResponse, 100)) 797 osn.enqueueResponse(3) 798 osn.enqueueResponse(1) 799 osn.enqueueResponse(2) 800 osn.enqueueResponse(3) 801 }() 802 return badSigErr 803 } 804 } 805 806 noFailFunc := func(_ *deliverServer, _ *cluster.BlockPuller) {} 807 808 recover := func(osn *deliverServer, bp *cluster.BlockPuller) func(entry zapcore.Entry) error { 809 return func(entry zapcore.Entry) error { 810 if osn.isFaulty() && strings.Contains(entry.Message, failureError.Error()) { 811 osn.Lock() 812 osn.err = nil 813 osn.Unlock() 814 } 815 if strings.Contains(entry.Message, "Failed verifying") { 816 bp.VerifyBlockSequence = noopBlockVerifierf 817 } 818 return nil 819 } 820 } 821 822 failAfterConnection := func(osn *deliverServer, logTrigger string, failFunc func()) func(entry zapcore.Entry) error { 823 once := &sync.Once{} 824 return func(entry zapcore.Entry) error { 825 if !osn.isFaulty() && strings.Contains(entry.Message, logTrigger) { 826 once.Do(func() { 827 failFunc() 828 }) 829 } 830 return nil 831 } 832 } 833 834 for _, testCase := range []struct { 835 name string 836 logTrigger string 837 beforeFunc func(*deliverServer, *cluster.BlockPuller) 838 failFunc func(*deliverServer, *cluster.BlockPuller) 839 }{ 840 { 841 name: "failure at probe", 842 logTrigger: "skip this for this test case", 843 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 844 failStream(osn, nil) 845 // The first seek request asks for the latest block but it fails 846 osn.addExpectProbeAssert() 847 // And the last block sequence is returned 848 osn.enqueueResponse(3) 849 // The next seek request is for block 1 850 osn.addExpectPullAssert(1) 851 }, 852 failFunc: noFailFunc, 853 }, 854 { 855 name: "failure at pull", 856 logTrigger: "Sending request for block [1]", 857 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 858 // The first seek request asks for the latest block and succeeds 859 osn.addExpectProbeAssert() 860 // But as the block puller tries to pull, the stream fails, so it should reconnect. 861 osn.addExpectProbeAssert() 862 // We need to send the latest sequence twice because the stream fails after the first time. 863 osn.enqueueResponse(3) 864 osn.enqueueResponse(3) 865 osn.addExpectPullAssert(1) 866 }, 867 failFunc: failStream, 868 }, 869 { 870 name: "failure at verifying pulled block", 871 logTrigger: "Sending request for block [1]", 872 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 873 // The first seek request asks for the latest block and succeeds 874 osn.addExpectProbeAssert() 875 osn.enqueueResponse(3) 876 // It then pulls all 3 blocks, but fails verifying them. 877 osn.addExpectPullAssert(1) 878 osn.enqueueResponse(1) 879 osn.enqueueResponse(2) 880 osn.enqueueResponse(3) 881 // So it probes again, and then requests block 1 again. 882 osn.addExpectProbeAssert() 883 osn.addExpectPullAssert(1) 884 }, 885 failFunc: malformBlockSignatureAndRecreateOSNBuffer, 886 }, 887 } { 888 testCase := testCase 889 t.Run(testCase.name, func(t *testing.T) { 890 osn := newClusterNode(t) 891 defer osn.stop() 892 893 dialer := newCountingDialer() 894 bp := newBlockPuller(dialer, osn.srv.Address()) 895 896 testCase.beforeFunc(osn, bp) 897 898 // Configure the logger to trigger failure at the appropriate time. 899 fail := func() { 900 testCase.failFunc(osn, bp) 901 } 902 bp.Logger = bp.Logger.WithOptions(zap.Hooks(recover(osn, bp), failAfterConnection(osn, testCase.logTrigger, fail))) 903 904 // The orderer sends blocks 1 to 3 905 osn.enqueueResponse(1) 906 osn.enqueueResponse(2) 907 osn.enqueueResponse(3) 908 909 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 910 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 911 assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 912 913 bp.Close() 914 dialer.assertAllConnectionsClosed(t) 915 }) 916 } 917 } 918 919 func TestBlockPullerBadBlocks(t *testing.T) { 920 // Scenario: ordering node sends malformed blocks. 921 922 removeHeader := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 923 resp.GetBlock().Header = nil 924 return resp 925 } 926 927 removeData := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 928 resp.GetBlock().Data = nil 929 return resp 930 } 931 932 removeMetadata := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 933 resp.GetBlock().Metadata = nil 934 return resp 935 } 936 937 changeType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 938 resp.Type = nil 939 return resp 940 } 941 942 statusType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 943 resp.Type = &orderer.DeliverResponse_Status{ 944 Status: common.Status_INTERNAL_SERVER_ERROR, 945 } 946 return resp 947 } 948 949 changeSequence := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 950 resp.GetBlock().Header.Number = 3 951 return resp 952 } 953 954 testcases := []struct { 955 name string 956 corruptBlock func(block *orderer.DeliverResponse) *orderer.DeliverResponse 957 expectedErrMsg string 958 }{ 959 { 960 name: "nil header", 961 corruptBlock: removeHeader, 962 expectedErrMsg: "block header is nil", 963 }, 964 { 965 name: "nil data", 966 corruptBlock: removeData, 967 expectedErrMsg: "block data is nil", 968 }, 969 { 970 name: "nil metadata", 971 corruptBlock: removeMetadata, 972 expectedErrMsg: "block metadata is empty", 973 }, 974 { 975 name: "wrong type", 976 corruptBlock: changeType, 977 expectedErrMsg: "response is of type <nil>, but expected a block", 978 }, 979 { 980 name: "bad type", 981 corruptBlock: statusType, 982 expectedErrMsg: "faulty node, received: status:INTERNAL_SERVER_ERROR ", 983 }, 984 { 985 name: "wrong number", 986 corruptBlock: changeSequence, 987 expectedErrMsg: "got unexpected sequence", 988 }, 989 } 990 991 for _, testCase := range testcases { 992 testCase := testCase 993 t.Run(testCase.name, func(t *testing.T) { 994 995 osn := newClusterNode(t) 996 defer osn.stop() 997 998 osn.addExpectProbeAssert() 999 osn.addExpectPullAssert(10) 1000 1001 dialer := newCountingDialer() 1002 bp := newBlockPuller(dialer, osn.srv.Address()) 1003 1004 osn.enqueueResponse(10) 1005 osn.enqueueResponse(10) 1006 // Fetch the first 1007 block := <-osn.blockResponses 1008 // Enqueue the probe response 1009 // re-insert the pull response after corrupting it, to the tail of the queue 1010 osn.blockResponses <- testCase.corruptBlock(block) 1011 // [ 10 10* ] 1012 // Expect the block puller to retry and this time give it what it wants 1013 osn.addExpectProbeAssert() 1014 osn.addExpectPullAssert(10) 1015 1016 // Wait until the block is pulled and the malleability is detected 1017 var detectedBadBlock sync.WaitGroup 1018 detectedBadBlock.Add(1) 1019 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1020 if strings.Contains(entry.Message, fmt.Sprintf("Failed pulling blocks: %s", testCase.expectedErrMsg)) { 1021 detectedBadBlock.Done() 1022 // Close the channel to make the current server-side deliver stream close 1023 close(osn.blocks()) 1024 // Ane reset the block buffer to be able to write into it again 1025 osn.setBlocks(make(chan *orderer.DeliverResponse, 100)) 1026 // Put a correct block after it, 1 for the probing and 1 for the fetch 1027 osn.enqueueResponse(10) 1028 osn.enqueueResponse(10) 1029 } 1030 return nil 1031 })) 1032 1033 bp.PullBlock(10) 1034 detectedBadBlock.Wait() 1035 1036 bp.Close() 1037 dialer.assertAllConnectionsClosed(t) 1038 }) 1039 } 1040 } 1041 1042 func TestImpatientStreamFailure(t *testing.T) { 1043 osn := newClusterNode(t) 1044 dialer := newCountingDialer() 1045 defer dialer.assertAllConnectionsClosed(t) 1046 // Wait for OSN to start 1047 // by trying to connect to it 1048 var conn *grpc.ClientConn 1049 var err error 1050 1051 gt := gomega.NewGomegaWithT(t) 1052 gt.Eventually(func() (bool, error) { 1053 conn, err = dialer.Dial(osn.endpointCriteria()) 1054 return true, err 1055 }).Should(gomega.BeTrue()) 1056 newStream := cluster.NewImpatientStream(conn, time.Millisecond*100) 1057 defer conn.Close() 1058 // Shutdown the OSN 1059 osn.stop() 1060 // Ensure the port isn't open anymore 1061 gt.Eventually(func() (bool, error) { 1062 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) 1063 defer cancel() 1064 conn, _ := grpc.DialContext(ctx, osn.srv.Address(), grpc.WithBlock(), grpc.WithInsecure()) 1065 if conn != nil { 1066 conn.Close() 1067 return false, nil 1068 } 1069 return true, nil 1070 }).Should(gomega.BeTrue()) 1071 stream, err := newStream() 1072 if err != nil { 1073 return 1074 } 1075 _, err = stream.Recv() 1076 assert.Error(t, err) 1077 } 1078 1079 func TestBlockPullerMaxRetriesExhausted(t *testing.T) { 1080 // Scenario: 1081 // The block puller is expected to pull blocks 1 to 3. 1082 // But the orderer only has blocks 1,2, and from some reason 1083 // it sends back block 2 twice (we do this so that we 1084 // don't rely on timeout, because timeouts are flaky in tests). 1085 // It should attempt to re-connect and to send requests 1086 // until the attempt number is exhausted, after which 1087 // it gives up, and nil is returned. 1088 1089 osn := newClusterNode(t) 1090 defer osn.stop() 1091 1092 // We report having up to block 3. 1093 osn.enqueueResponse(3) 1094 osn.addExpectProbeAssert() 1095 // We send blocks 1 1096 osn.addExpectPullAssert(1) 1097 osn.enqueueResponse(1) 1098 // And 2, twice. 1099 osn.enqueueResponse(2) 1100 osn.enqueueResponse(2) 1101 // A nil message signals the deliver stream closes. 1102 // This is to signal the server side to prepare for a new deliver 1103 // stream that the client should open. 1104 osn.blockResponses <- nil 1105 1106 for i := 0; i < 2; i++ { 1107 // Therefore, the block puller should disconnect and reconnect. 1108 osn.addExpectProbeAssert() 1109 // We report having up to block 3. 1110 osn.enqueueResponse(3) 1111 // And we expect to be asked for block 3, since blocks 1, 2 1112 // have already been passed to the caller. 1113 osn.addExpectPullAssert(3) 1114 // Once again, we send 2 instead of 3 1115 osn.enqueueResponse(2) 1116 // The client disconnects again 1117 osn.blockResponses <- nil 1118 } 1119 1120 dialer := newCountingDialer() 1121 bp := newBlockPuller(dialer, osn.srv.Address()) 1122 1123 var exhaustedRetryAttemptsLogged bool 1124 1125 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1126 if entry.Message == "Failed pulling block [3]: retry count exhausted(2)" { 1127 exhaustedRetryAttemptsLogged = true 1128 } 1129 return nil 1130 })) 1131 1132 bp.MaxPullBlockRetries = 2 1133 // We don't expect to timeout in this test, so make the timeout large 1134 // to prevent flakes due to CPU starvation. 1135 bp.FetchTimeout = time.Hour 1136 // Make the buffer tiny, only a single byte - in order deliver blocks 1137 // to the caller one by one and not store them in the buffer. 1138 bp.MaxTotalBufferBytes = 1 1139 1140 // Assert reception of blocks 1 to 3 1141 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 1142 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 1143 assert.Nil(t, bp.PullBlock(uint64(3))) 1144 1145 bp.Close() 1146 dialer.assertAllConnectionsClosed(t) 1147 assert.True(t, exhaustedRetryAttemptsLogged) 1148 }