github.com/kaituanwang/hyperledger@v2.0.1+incompatible/orderer/common/cluster/deliver_test.go (about) 1 /* 2 Copyright IBM Corp. 2018 All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package cluster_test 8 9 import ( 10 "context" 11 "crypto/rand" 12 "encoding/hex" 13 "fmt" 14 "net" 15 "strconv" 16 "strings" 17 "sync" 18 "sync/atomic" 19 "testing" 20 "time" 21 22 "github.com/golang/protobuf/proto" 23 "github.com/hyperledger/fabric-protos-go/common" 24 "github.com/hyperledger/fabric-protos-go/orderer" 25 "github.com/hyperledger/fabric/bccsp/factory" 26 "github.com/hyperledger/fabric/common/flogging" 27 "github.com/hyperledger/fabric/core/comm" 28 "github.com/hyperledger/fabric/internal/pkg/identity" 29 "github.com/hyperledger/fabric/orderer/common/cluster" 30 "github.com/hyperledger/fabric/orderer/common/cluster/mocks" 31 "github.com/hyperledger/fabric/protoutil" 32 "github.com/onsi/gomega" 33 "github.com/pkg/errors" 34 "github.com/stretchr/testify/assert" 35 "go.uber.org/zap" 36 "go.uber.org/zap/zapcore" 37 "google.golang.org/grpc" 38 "google.golang.org/grpc/balancer" 39 "google.golang.org/grpc/balancer/roundrobin" 40 ) 41 42 // protects gRPC balancer registration 43 var gRPCBalancerLock = sync.Mutex{} 44 45 func init() { 46 factory.InitFactories(nil) 47 } 48 49 //go:generate counterfeiter -o mocks/signer_serializer.go --fake-name SignerSerializer . signerSerializer 50 51 type signerSerializer interface { 52 identity.SignerSerializer 53 } 54 55 type wrappedBalancer struct { 56 balancer.Balancer 57 balancer.V2Balancer 58 cd *countingDialer 59 } 60 61 func (wb *wrappedBalancer) Close() { 62 defer atomic.AddUint32(&wb.cd.connectionCount, ^uint32(0)) 63 wb.Balancer.Close() 64 } 65 66 type countingDialer struct { 67 name string 68 baseBuilder balancer.Builder 69 connectionCount uint32 70 } 71 72 func newCountingDialer() *countingDialer { 73 gRPCBalancerLock.Lock() 74 builder := balancer.Get(roundrobin.Name) 75 gRPCBalancerLock.Unlock() 76 77 buff := make([]byte, 16) 78 rand.Read(buff) 79 cb := &countingDialer{ 80 name: hex.EncodeToString(buff), 81 baseBuilder: builder, 82 } 83 84 gRPCBalancerLock.Lock() 85 balancer.Register(cb) 86 gRPCBalancerLock.Unlock() 87 88 return cb 89 } 90 91 func (d *countingDialer) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 92 defer atomic.AddUint32(&d.connectionCount, 1) 93 lb := d.baseBuilder.Build(cc, opts) 94 return &wrappedBalancer{ 95 Balancer: lb, 96 V2Balancer: lb.(balancer.V2Balancer), 97 cd: d, 98 } 99 } 100 101 func (d *countingDialer) Name() string { 102 return d.name 103 } 104 105 func (d *countingDialer) assertAllConnectionsClosed(t *testing.T) { 106 timeLimit := time.Now().Add(timeout) 107 for atomic.LoadUint32(&d.connectionCount) != uint32(0) && time.Now().Before(timeLimit) { 108 time.Sleep(time.Millisecond) 109 } 110 assert.Equal(t, uint32(0), atomic.LoadUint32(&d.connectionCount)) 111 } 112 113 func (d *countingDialer) Dial(address cluster.EndpointCriteria) (*grpc.ClientConn, error) { 114 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) 115 defer cancel() 116 117 gRPCBalancerLock.Lock() 118 balancer := grpc.WithBalancerName(d.name) 119 gRPCBalancerLock.Unlock() 120 return grpc.DialContext(ctx, address.Endpoint, grpc.WithBlock(), grpc.WithInsecure(), balancer) 121 } 122 123 func noopBlockVerifierf(_ []*common.Block, _ string) error { 124 return nil 125 } 126 127 func readSeekEnvelope(stream orderer.AtomicBroadcast_DeliverServer) (*orderer.SeekInfo, string, error) { 128 env, err := stream.Recv() 129 if err != nil { 130 return nil, "", err 131 } 132 payload, err := protoutil.UnmarshalPayload(env.Payload) 133 if err != nil { 134 return nil, "", err 135 } 136 seekInfo := &orderer.SeekInfo{} 137 if err = proto.Unmarshal(payload.Data, seekInfo); err != nil { 138 return nil, "", err 139 } 140 chdr := &common.ChannelHeader{} 141 if err = proto.Unmarshal(payload.Header.ChannelHeader, chdr); err != nil { 142 return nil, "", err 143 } 144 return seekInfo, chdr.ChannelId, nil 145 } 146 147 type deliverServer struct { 148 t *testing.T 149 sync.Mutex 150 err error 151 srv *comm.GRPCServer 152 seekAssertions chan func(*orderer.SeekInfo, string) 153 blockResponses chan *orderer.DeliverResponse 154 } 155 156 func (ds *deliverServer) endpointCriteria() cluster.EndpointCriteria { 157 return cluster.EndpointCriteria{Endpoint: ds.srv.Address()} 158 } 159 160 func (ds *deliverServer) isFaulty() bool { 161 ds.Lock() 162 defer ds.Unlock() 163 return ds.err != nil 164 } 165 166 func (*deliverServer) Broadcast(orderer.AtomicBroadcast_BroadcastServer) error { 167 panic("implement me") 168 } 169 170 func (ds *deliverServer) Deliver(stream orderer.AtomicBroadcast_DeliverServer) error { 171 ds.Lock() 172 err := ds.err 173 ds.Unlock() 174 175 if err != nil { 176 return err 177 } 178 seekInfo, channel, err := readSeekEnvelope(stream) 179 if err != nil { 180 panic(err) 181 } 182 // Get the next seek assertion and ensure the next seek is of the expected type 183 seekAssert := <-ds.seekAssertions 184 seekAssert(seekInfo, channel) 185 186 if seekInfo.GetStart().GetSpecified() != nil { 187 return ds.deliverBlocks(stream) 188 } 189 if seekInfo.GetStart().GetNewest() != nil { 190 resp := <-ds.blocks() 191 if resp == nil { 192 return nil 193 } 194 return stream.Send(resp) 195 } 196 panic(fmt.Sprintf("expected either specified or newest seek but got %v", seekInfo.GetStart())) 197 } 198 199 func (ds *deliverServer) deliverBlocks(stream orderer.AtomicBroadcast_DeliverServer) error { 200 for { 201 blockChan := ds.blocks() 202 response := <-blockChan 203 // A nil response is a signal from the test to close the stream. 204 // This is needed to avoid reading from the block buffer, hence 205 // consuming by accident a block that is tabled to be pulled 206 // later in the test. 207 if response == nil { 208 return nil 209 } 210 if err := stream.Send(response); err != nil { 211 return err 212 } 213 } 214 } 215 216 func (ds *deliverServer) blocks() chan *orderer.DeliverResponse { 217 ds.Lock() 218 defer ds.Unlock() 219 blockChan := ds.blockResponses 220 return blockChan 221 } 222 223 func (ds *deliverServer) setBlocks(blocks chan *orderer.DeliverResponse) { 224 ds.Lock() 225 defer ds.Unlock() 226 ds.blockResponses = blocks 227 } 228 229 func (ds *deliverServer) port() int { 230 _, portStr, err := net.SplitHostPort(ds.srv.Address()) 231 assert.NoError(ds.t, err) 232 233 port, err := strconv.ParseInt(portStr, 10, 32) 234 assert.NoError(ds.t, err) 235 return int(port) 236 } 237 238 func (ds *deliverServer) resurrect() { 239 var err error 240 // copy the responses channel into a fresh one 241 respChan := make(chan *orderer.DeliverResponse, 100) 242 for resp := range ds.blocks() { 243 respChan <- resp 244 } 245 ds.blockResponses = respChan 246 ds.srv.Stop() 247 // And re-create the gRPC server on that port 248 ds.srv, err = comm.NewGRPCServer(fmt.Sprintf("127.0.0.1:%d", ds.port()), comm.ServerConfig{}) 249 assert.NoError(ds.t, err) 250 orderer.RegisterAtomicBroadcastServer(ds.srv.Server(), ds) 251 go ds.srv.Start() 252 } 253 254 func (ds *deliverServer) stop() { 255 ds.srv.Stop() 256 close(ds.blocks()) 257 } 258 259 func (ds *deliverServer) enqueueResponse(seq uint64) { 260 ds.blocks() <- &orderer.DeliverResponse{ 261 Type: &orderer.DeliverResponse_Block{Block: protoutil.NewBlock(seq, nil)}, 262 } 263 } 264 265 func (ds *deliverServer) addExpectProbeAssert() { 266 ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) { 267 assert.NotNil(ds.t, info.GetStart().GetNewest()) 268 assert.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT) 269 } 270 } 271 272 func (ds *deliverServer) addExpectPullAssert(seq uint64) { 273 ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) { 274 assert.NotNil(ds.t, info.GetStart().GetSpecified()) 275 assert.Equal(ds.t, seq, info.GetStart().GetSpecified().Number) 276 assert.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT) 277 } 278 } 279 280 func newClusterNode(t *testing.T) *deliverServer { 281 srv, err := comm.NewGRPCServer("127.0.0.1:0", comm.ServerConfig{}) 282 if err != nil { 283 panic(err) 284 } 285 ds := &deliverServer{ 286 t: t, 287 seekAssertions: make(chan func(*orderer.SeekInfo, string), 100), 288 blockResponses: make(chan *orderer.DeliverResponse, 100), 289 srv: srv, 290 } 291 orderer.RegisterAtomicBroadcastServer(srv.Server(), ds) 292 go srv.Start() 293 return ds 294 } 295 296 func newBlockPuller(dialer *countingDialer, orderers ...string) *cluster.BlockPuller { 297 return &cluster.BlockPuller{ 298 Dialer: dialer, 299 Channel: "mychannel", 300 Signer: &mocks.SignerSerializer{}, 301 Endpoints: endpointCriteriaFromEndpoints(orderers...), 302 FetchTimeout: time.Second * 10, 303 MaxTotalBufferBytes: 1024 * 1024, // 1MB 304 RetryTimeout: time.Millisecond * 10, 305 VerifyBlockSequence: noopBlockVerifierf, 306 Logger: flogging.MustGetLogger("test"), 307 } 308 } 309 310 func endpointCriteriaFromEndpoints(orderers ...string) []cluster.EndpointCriteria { 311 var res []cluster.EndpointCriteria 312 for _, orderer := range orderers { 313 res = append(res, cluster.EndpointCriteria{Endpoint: orderer}) 314 } 315 return res 316 } 317 318 func TestBlockPullerBasicHappyPath(t *testing.T) { 319 // Scenario: Single ordering node, 320 // and the block puller pulls blocks 5 to 10. 321 osn := newClusterNode(t) 322 defer osn.stop() 323 324 dialer := newCountingDialer() 325 bp := newBlockPuller(dialer, osn.srv.Address()) 326 327 // The first seek request asks for the latest block 328 osn.addExpectProbeAssert() 329 // The first response says that the height is 10 330 osn.enqueueResponse(10) 331 // The next seek request is for block 5 332 osn.addExpectPullAssert(5) 333 // The later responses are the block themselves 334 for i := 5; i <= 10; i++ { 335 osn.enqueueResponse(uint64(i)) 336 } 337 338 for i := 5; i <= 10; i++ { 339 assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 340 } 341 assert.Len(t, osn.blockResponses, 0) 342 343 bp.Close() 344 dialer.assertAllConnectionsClosed(t) 345 } 346 347 func TestBlockPullerDuplicate(t *testing.T) { 348 // Scenario: The address of the ordering node 349 // is found twice in the configuration, but this 350 // doesn't create a problem. 351 osn := newClusterNode(t) 352 defer osn.stop() 353 354 dialer := newCountingDialer() 355 // Add the address twice 356 bp := newBlockPuller(dialer, osn.srv.Address(), osn.srv.Address()) 357 358 // The first seek request asks for the latest block (twice) 359 osn.addExpectProbeAssert() 360 osn.addExpectProbeAssert() 361 // The first response says that the height is 3 362 osn.enqueueResponse(3) 363 osn.enqueueResponse(3) 364 // The next seek request is for block 1, only from some OSN 365 osn.addExpectPullAssert(1) 366 // The later responses are the block themselves 367 for i := 1; i <= 3; i++ { 368 osn.enqueueResponse(uint64(i)) 369 } 370 371 for i := 1; i <= 3; i++ { 372 assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 373 } 374 assert.Len(t, osn.blockResponses, 0) 375 376 bp.Close() 377 dialer.assertAllConnectionsClosed(t) 378 } 379 380 func TestBlockPullerHeavyBlocks(t *testing.T) { 381 // Scenario: Single ordering node, 382 // and the block puller pulls 50 blocks, each 383 // weighing 1K, but the buffer can only contain 384 // 10K, so it should pull the 50 blocks in chunks of 10, 385 // and verify 5 sequences at a time. 386 387 osn := newClusterNode(t) 388 defer osn.stop() 389 osn.addExpectProbeAssert() 390 osn.addExpectPullAssert(1) 391 osn.enqueueResponse(100) // The last sequence is 100 392 393 enqueueBlockBatch := func(start, end uint64) { 394 for seq := start; seq <= end; seq++ { 395 resp := &orderer.DeliverResponse{ 396 Type: &orderer.DeliverResponse_Block{ 397 Block: protoutil.NewBlock(seq, nil), 398 }, 399 } 400 data := resp.GetBlock().Data.Data 401 resp.GetBlock().Data.Data = append(data, make([]byte, 1024)) 402 osn.blockResponses <- resp 403 } 404 } 405 406 dialer := newCountingDialer() 407 bp := newBlockPuller(dialer, osn.srv.Address()) 408 var gotBlockMessageCount int 409 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 410 if strings.Contains(entry.Message, "Got block") { 411 gotBlockMessageCount++ 412 } 413 return nil 414 })) 415 bp.MaxTotalBufferBytes = 1024 * 10 // 10K 416 417 // Enqueue only the next batch in the orderer node. 418 // This ensures that only 10 blocks are fetched into the buffer 419 // and not more. 420 for i := uint64(0); i < 5; i++ { 421 enqueueBlockBatch(i*10+uint64(1), i*10+uint64(10)) 422 for seq := i*10 + uint64(1); seq <= i*10+uint64(10); seq++ { 423 assert.Equal(t, seq, bp.PullBlock(seq).Header.Number) 424 } 425 } 426 427 assert.Equal(t, 50, gotBlockMessageCount) 428 bp.Close() 429 dialer.assertAllConnectionsClosed(t) 430 } 431 432 func TestBlockPullerClone(t *testing.T) { 433 // Scenario: We have a block puller that is connected 434 // to an ordering node, and we clone it. 435 // We expect that the new block puller is a clean slate 436 // and doesn't share any internal state with its origin. 437 osn1 := newClusterNode(t) 438 defer osn1.stop() 439 440 osn1.addExpectProbeAssert() 441 osn1.addExpectPullAssert(1) 442 // last block sequence is 100 443 osn1.enqueueResponse(100) 444 osn1.enqueueResponse(1) 445 // The block puller is expected to disconnect after pulling 446 // a single block. So signal the server-side to avoid 447 // grabbing the next block after block 1 is pulled. 448 osn1.blockResponses <- nil 449 450 dialer := newCountingDialer() 451 bp := newBlockPuller(dialer, osn1.srv.Address()) 452 bp.FetchTimeout = time.Millisecond * 100 453 // Pull a block at a time and don't buffer them 454 bp.MaxTotalBufferBytes = 1 455 // Clone the block puller 456 bpClone := bp.Clone() 457 // and override its channel 458 bpClone.Channel = "foo" 459 // Ensure the channel change doesn't reflect in the original puller 460 assert.Equal(t, "mychannel", bp.Channel) 461 462 block := bp.PullBlock(1) 463 assert.Equal(t, uint64(1), block.Header.Number) 464 465 // After the original block puller is closed, the 466 // clone should not be affected 467 bp.Close() 468 dialer.assertAllConnectionsClosed(t) 469 470 // The clone block puller should not have cached the internal state 471 // from its origin block puller, thus it should probe again for the last block 472 // sequence as if it is a new block puller. 473 osn1.addExpectProbeAssert() 474 osn1.addExpectPullAssert(2) 475 osn1.enqueueResponse(100) 476 osn1.enqueueResponse(2) 477 478 block = bpClone.PullBlock(2) 479 assert.Equal(t, uint64(2), block.Header.Number) 480 481 bpClone.Close() 482 dialer.assertAllConnectionsClosed(t) 483 } 484 485 func TestBlockPullerHeightsByEndpoints(t *testing.T) { 486 // Scenario: We ask for the latest block from all the known ordering nodes. 487 // One ordering node is unavailable (offline). 488 // One ordering node doesn't have blocks for that channel. 489 // The remaining node returns the latest block. 490 osn1 := newClusterNode(t) 491 492 osn2 := newClusterNode(t) 493 defer osn2.stop() 494 495 osn3 := newClusterNode(t) 496 defer osn3.stop() 497 498 dialer := newCountingDialer() 499 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address()) 500 501 // The first seek request asks for the latest block from some ordering node 502 osn1.addExpectProbeAssert() 503 osn2.addExpectProbeAssert() 504 osn3.addExpectProbeAssert() 505 506 // The first ordering node is offline 507 osn1.stop() 508 // The second returns a bad answer 509 osn2.blockResponses <- &orderer.DeliverResponse{ 510 Type: &orderer.DeliverResponse_Status{Status: common.Status_FORBIDDEN}, 511 } 512 // The third returns the latest block 513 osn3.enqueueResponse(5) 514 515 res, err := bp.HeightsByEndpoints() 516 assert.NoError(t, err) 517 expected := map[string]uint64{ 518 osn3.srv.Address(): 6, 519 } 520 assert.Equal(t, expected, res) 521 522 bp.Close() 523 dialer.assertAllConnectionsClosed(t) 524 } 525 526 func TestBlockPullerMultipleOrderers(t *testing.T) { 527 // Scenario: 3 ordering nodes, 528 // and the block puller pulls blocks 3 to 5 from some 529 // orderer node. 530 // We ensure that exactly a single orderer node sent blocks. 531 532 osn1 := newClusterNode(t) 533 defer osn1.stop() 534 535 osn2 := newClusterNode(t) 536 defer osn2.stop() 537 538 osn3 := newClusterNode(t) 539 defer osn3.stop() 540 541 dialer := newCountingDialer() 542 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address()) 543 544 // The first seek request asks for the latest block from some ordering node 545 osn1.addExpectProbeAssert() 546 osn2.addExpectProbeAssert() 547 osn3.addExpectProbeAssert() 548 // The first response says that the height is 5 549 osn1.enqueueResponse(5) 550 osn2.enqueueResponse(5) 551 osn3.enqueueResponse(5) 552 553 // The next seek request is for block 3 554 osn1.addExpectPullAssert(3) 555 osn2.addExpectPullAssert(3) 556 osn3.addExpectPullAssert(3) 557 558 // The later responses are the block themselves 559 for i := 3; i <= 5; i++ { 560 osn1.enqueueResponse(uint64(i)) 561 osn2.enqueueResponse(uint64(i)) 562 osn3.enqueueResponse(uint64(i)) 563 } 564 565 initialTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses) 566 567 for i := 3; i <= 5; i++ { 568 assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number) 569 } 570 571 // Assert the cumulative amount of blocks in the OSNs went down by 6: 572 // blocks 3, 4, 5 were pulled - that's 3 blocks. 573 // block 5 was pulled 3 times at the probe phase. 574 finalTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses) 575 assert.Equal(t, initialTotalBlockAmount-6, finalTotalBlockAmount) 576 577 bp.Close() 578 dialer.assertAllConnectionsClosed(t) 579 } 580 581 func TestBlockPullerFailover(t *testing.T) { 582 // Scenario: 583 // The block puller is expected to pull blocks 1 to 3. 584 // There are two ordering nodes, but at first - only node 1 is available. 585 // the block puller first connects to it, but as it pulls the first block, 586 // it crashes. 587 // A second orderer is then spawned and the block puller is expected to 588 // connect to it and pull the rest of the blocks. 589 590 osn1 := newClusterNode(t) 591 osn1.addExpectProbeAssert() 592 osn1.addExpectPullAssert(1) 593 osn1.enqueueResponse(3) 594 osn1.enqueueResponse(1) 595 596 osn2 := newClusterNode(t) 597 defer osn2.stop() 598 599 osn2.addExpectProbeAssert() 600 osn2.addExpectPullAssert(1) 601 // First response is for the probe 602 osn2.enqueueResponse(3) 603 // Next three responses are for the pulling. 604 osn2.enqueueResponse(1) 605 osn2.enqueueResponse(2) 606 osn2.enqueueResponse(3) 607 608 // First we stop node 2 to make sure the block puller can't connect to it when it is created 609 osn2.stop() 610 611 dialer := newCountingDialer() 612 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address()) 613 614 // We don't want to rely on fetch timeout, but on pure failover logic, 615 // so make the fetch timeout huge 616 bp.FetchTimeout = time.Hour 617 618 // Configure the block puller logger to signal the wait group once the block puller 619 // received the first block. 620 var pulledBlock1 sync.WaitGroup 621 pulledBlock1.Add(1) 622 var once sync.Once 623 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 624 if strings.Contains(entry.Message, "Got block [1] of size") { 625 once.Do(pulledBlock1.Done) 626 } 627 return nil 628 })) 629 630 go func() { 631 // Wait for the block puller to pull the first block 632 pulledBlock1.Wait() 633 // and now crash node1 and resurrect node 2 634 osn1.stop() 635 osn2.resurrect() 636 }() 637 638 // Assert reception of blocks 1 to 3 639 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 640 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 641 assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 642 643 bp.Close() 644 dialer.assertAllConnectionsClosed(t) 645 } 646 647 func TestBlockPullerNoneResponsiveOrderer(t *testing.T) { 648 // Scenario: There are two ordering nodes, and the block puller 649 // connects to one of them. 650 // it fetches the first block, but then the second block isn't sent 651 // for too long, and the block puller should abort, and try the other 652 // node. 653 654 osn1 := newClusterNode(t) 655 defer osn1.stop() 656 657 osn2 := newClusterNode(t) 658 defer osn2.stop() 659 660 osn1.addExpectProbeAssert() 661 osn2.addExpectProbeAssert() 662 osn1.enqueueResponse(3) 663 osn2.enqueueResponse(3) 664 665 dialer := newCountingDialer() 666 bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address()) 667 bp.FetchTimeout = time.Millisecond * 100 668 669 notInUseOrdererNode := osn2 670 // Configure the logger to tell us who is the orderer node that the block puller 671 // isn't connected to. This is done by intercepting the appropriate message 672 var waitForConnection sync.WaitGroup 673 waitForConnection.Add(1) 674 var once sync.Once 675 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 676 if !strings.Contains(entry.Message, "Sending request for block [1]") { 677 return nil 678 } 679 defer once.Do(waitForConnection.Done) 680 s := entry.Message[len("Sending request for block [1] to 127.0.0.1:"):] 681 port, err := strconv.ParseInt(s, 10, 32) 682 assert.NoError(t, err) 683 // If osn2 is the current orderer we're connected to, 684 // the orderer we're not connected to, is osn1 685 if osn2.port() == int(port) { 686 notInUseOrdererNode = osn1 687 // Enqueue block 1 to the current orderer the block puller 688 // is connected to 689 osn2.enqueueResponse(1) 690 osn2.addExpectPullAssert(1) 691 } else { 692 // We're connected to osn1, so enqueue block 1 693 osn1.enqueueResponse(1) 694 osn1.addExpectPullAssert(1) 695 } 696 return nil 697 })) 698 699 go func() { 700 waitForConnection.Wait() 701 // Enqueue the height int the orderer we're connected to 702 notInUseOrdererNode.enqueueResponse(3) 703 notInUseOrdererNode.addExpectProbeAssert() 704 // Enqueue blocks 1, 2, 3 to the orderer node we're not connected to. 705 notInUseOrdererNode.addExpectPullAssert(1) 706 notInUseOrdererNode.enqueueResponse(1) 707 notInUseOrdererNode.enqueueResponse(2) 708 notInUseOrdererNode.enqueueResponse(3) 709 }() 710 711 // Assert reception of blocks 1 to 3 712 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 713 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 714 assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 715 716 bp.Close() 717 dialer.assertAllConnectionsClosed(t) 718 } 719 720 func TestBlockPullerNoOrdererAliveAtStartup(t *testing.T) { 721 // Scenario: Single ordering node, and when the block puller 722 // starts up - the orderer is nowhere to be found. 723 osn := newClusterNode(t) 724 osn.stop() 725 defer osn.stop() 726 727 dialer := newCountingDialer() 728 bp := newBlockPuller(dialer, osn.srv.Address()) 729 730 // Configure the logger to wait until the a failed connection attempt was made 731 var connectionAttempt sync.WaitGroup 732 connectionAttempt.Add(1) 733 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 734 if strings.Contains(entry.Message, "Failed connecting to") { 735 connectionAttempt.Done() 736 } 737 return nil 738 })) 739 740 go func() { 741 connectionAttempt.Wait() 742 osn.resurrect() 743 // The first seek request asks for the latest block 744 osn.addExpectProbeAssert() 745 // The first response says that the last sequence is 2 746 osn.enqueueResponse(2) 747 // The next seek request is for block 1 748 osn.addExpectPullAssert(1) 749 // And the orderer returns block 1 and 2 750 osn.enqueueResponse(1) 751 osn.enqueueResponse(2) 752 }() 753 754 assert.Equal(t, uint64(1), bp.PullBlock(1).Header.Number) 755 assert.Equal(t, uint64(2), bp.PullBlock(2).Header.Number) 756 757 bp.Close() 758 dialer.assertAllConnectionsClosed(t) 759 } 760 761 func TestBlockPullerFailures(t *testing.T) { 762 // Scenario: Single ordering node is faulty, but later 763 // on it recovers. 764 failureError := errors.New("oops, something went wrong") 765 failStream := func(osn *deliverServer, _ *cluster.BlockPuller) { 766 osn.Lock() 767 osn.err = failureError 768 osn.Unlock() 769 } 770 771 badSigErr := errors.New("bad signature") 772 malformBlockSignatureAndRecreateOSNBuffer := func(osn *deliverServer, bp *cluster.BlockPuller) { 773 bp.VerifyBlockSequence = func(_ []*common.Block, _ string) error { 774 close(osn.blocks()) 775 // After failing once, recover and remove the bad signature error. 776 defer func() { 777 // Skip recovery if we already recovered. 778 if badSigErr == nil { 779 return 780 } 781 badSigErr = nil 782 osn.setBlocks(make(chan *orderer.DeliverResponse, 100)) 783 osn.enqueueResponse(3) 784 osn.enqueueResponse(1) 785 osn.enqueueResponse(2) 786 osn.enqueueResponse(3) 787 }() 788 return badSigErr 789 } 790 } 791 792 noFailFunc := func(_ *deliverServer, _ *cluster.BlockPuller) {} 793 794 recover := func(osn *deliverServer, bp *cluster.BlockPuller) func(entry zapcore.Entry) error { 795 return func(entry zapcore.Entry) error { 796 if osn.isFaulty() && strings.Contains(entry.Message, failureError.Error()) { 797 osn.Lock() 798 osn.err = nil 799 osn.Unlock() 800 } 801 if strings.Contains(entry.Message, "Failed verifying") { 802 bp.VerifyBlockSequence = noopBlockVerifierf 803 } 804 return nil 805 } 806 } 807 808 failAfterConnection := func(osn *deliverServer, logTrigger string, failFunc func()) func(entry zapcore.Entry) error { 809 once := &sync.Once{} 810 return func(entry zapcore.Entry) error { 811 if !osn.isFaulty() && strings.Contains(entry.Message, logTrigger) { 812 once.Do(func() { 813 failFunc() 814 }) 815 } 816 return nil 817 } 818 } 819 820 for _, testCase := range []struct { 821 name string 822 logTrigger string 823 beforeFunc func(*deliverServer, *cluster.BlockPuller) 824 failFunc func(*deliverServer, *cluster.BlockPuller) 825 }{ 826 { 827 name: "failure at probe", 828 logTrigger: "skip this for this test case", 829 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 830 failStream(osn, nil) 831 // The first seek request asks for the latest block but it fails 832 osn.addExpectProbeAssert() 833 // And the last block sequence is returned 834 osn.enqueueResponse(3) 835 // The next seek request is for block 1 836 osn.addExpectPullAssert(1) 837 }, 838 failFunc: noFailFunc, 839 }, 840 { 841 name: "failure at pull", 842 logTrigger: "Sending request for block [1]", 843 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 844 // The first seek request asks for the latest block and succeeds 845 osn.addExpectProbeAssert() 846 // But as the block puller tries to pull, the stream fails, so it should reconnect. 847 osn.addExpectProbeAssert() 848 // We need to send the latest sequence twice because the stream fails after the first time. 849 osn.enqueueResponse(3) 850 osn.enqueueResponse(3) 851 osn.addExpectPullAssert(1) 852 }, 853 failFunc: failStream, 854 }, 855 { 856 name: "failure at verifying pulled block", 857 logTrigger: "Sending request for block [1]", 858 beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) { 859 // The first seek request asks for the latest block and succeeds 860 osn.addExpectProbeAssert() 861 osn.enqueueResponse(3) 862 // It then pulls all 3 blocks, but fails verifying them. 863 osn.addExpectPullAssert(1) 864 osn.enqueueResponse(1) 865 osn.enqueueResponse(2) 866 osn.enqueueResponse(3) 867 // So it probes again, and then requests block 1 again. 868 osn.addExpectProbeAssert() 869 osn.addExpectPullAssert(1) 870 }, 871 failFunc: malformBlockSignatureAndRecreateOSNBuffer, 872 }, 873 } { 874 testCase := testCase 875 t.Run(testCase.name, func(t *testing.T) { 876 osn := newClusterNode(t) 877 defer osn.stop() 878 879 dialer := newCountingDialer() 880 bp := newBlockPuller(dialer, osn.srv.Address()) 881 882 testCase.beforeFunc(osn, bp) 883 884 // Configure the logger to trigger failure at the appropriate time. 885 fail := func() { 886 testCase.failFunc(osn, bp) 887 } 888 bp.Logger = bp.Logger.WithOptions(zap.Hooks(recover(osn, bp), failAfterConnection(osn, testCase.logTrigger, fail))) 889 890 // The orderer sends blocks 1 to 3 891 osn.enqueueResponse(1) 892 osn.enqueueResponse(2) 893 osn.enqueueResponse(3) 894 895 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 896 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 897 assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number) 898 899 bp.Close() 900 dialer.assertAllConnectionsClosed(t) 901 }) 902 } 903 } 904 905 func TestBlockPullerBadBlocks(t *testing.T) { 906 // Scenario: ordering node sends malformed blocks. 907 908 removeHeader := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 909 resp.GetBlock().Header = nil 910 return resp 911 } 912 913 removeData := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 914 resp.GetBlock().Data = nil 915 return resp 916 } 917 918 removeMetadata := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 919 resp.GetBlock().Metadata = nil 920 return resp 921 } 922 923 changeType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 924 resp.Type = nil 925 return resp 926 } 927 928 statusType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 929 resp.Type = &orderer.DeliverResponse_Status{ 930 Status: common.Status_INTERNAL_SERVER_ERROR, 931 } 932 return resp 933 } 934 935 changeSequence := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse { 936 resp.GetBlock().Header.Number = 3 937 return resp 938 } 939 940 testcases := []struct { 941 name string 942 corruptBlock func(block *orderer.DeliverResponse) *orderer.DeliverResponse 943 expectedErrMsg string 944 }{ 945 { 946 name: "nil header", 947 corruptBlock: removeHeader, 948 expectedErrMsg: "block header is nil", 949 }, 950 { 951 name: "nil data", 952 corruptBlock: removeData, 953 expectedErrMsg: "block data is nil", 954 }, 955 { 956 name: "nil metadata", 957 corruptBlock: removeMetadata, 958 expectedErrMsg: "block metadata is empty", 959 }, 960 { 961 name: "wrong type", 962 corruptBlock: changeType, 963 expectedErrMsg: "response is of type <nil>, but expected a block", 964 }, 965 { 966 name: "bad type", 967 corruptBlock: statusType, 968 expectedErrMsg: "faulty node, received: status:INTERNAL_SERVER_ERROR ", 969 }, 970 { 971 name: "wrong number", 972 corruptBlock: changeSequence, 973 expectedErrMsg: "got unexpected sequence", 974 }, 975 } 976 977 for _, testCase := range testcases { 978 testCase := testCase 979 t.Run(testCase.name, func(t *testing.T) { 980 981 osn := newClusterNode(t) 982 defer osn.stop() 983 984 osn.addExpectProbeAssert() 985 osn.addExpectPullAssert(10) 986 987 dialer := newCountingDialer() 988 bp := newBlockPuller(dialer, osn.srv.Address()) 989 990 osn.enqueueResponse(10) 991 osn.enqueueResponse(10) 992 // Fetch the first 993 block := <-osn.blockResponses 994 // Enqueue the probe response 995 // re-insert the pull response after corrupting it, to the tail of the queue 996 osn.blockResponses <- testCase.corruptBlock(block) 997 // [ 10 10* ] 998 // Expect the block puller to retry and this time give it what it wants 999 osn.addExpectProbeAssert() 1000 osn.addExpectPullAssert(10) 1001 1002 // Wait until the block is pulled and the malleability is detected 1003 var detectedBadBlock sync.WaitGroup 1004 detectedBadBlock.Add(1) 1005 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1006 if strings.Contains(entry.Message, fmt.Sprintf("Failed pulling blocks: %s", testCase.expectedErrMsg)) { 1007 detectedBadBlock.Done() 1008 // Close the channel to make the current server-side deliver stream close 1009 close(osn.blocks()) 1010 // Ane reset the block buffer to be able to write into it again 1011 osn.setBlocks(make(chan *orderer.DeliverResponse, 100)) 1012 // Put a correct block after it, 1 for the probing and 1 for the fetch 1013 osn.enqueueResponse(10) 1014 osn.enqueueResponse(10) 1015 } 1016 return nil 1017 })) 1018 1019 bp.PullBlock(10) 1020 detectedBadBlock.Wait() 1021 1022 bp.Close() 1023 dialer.assertAllConnectionsClosed(t) 1024 }) 1025 } 1026 } 1027 1028 func TestImpatientStreamFailure(t *testing.T) { 1029 osn := newClusterNode(t) 1030 dialer := newCountingDialer() 1031 defer dialer.assertAllConnectionsClosed(t) 1032 // Wait for OSN to start 1033 // by trying to connect to it 1034 var conn *grpc.ClientConn 1035 var err error 1036 1037 gt := gomega.NewGomegaWithT(t) 1038 gt.Eventually(func() (bool, error) { 1039 conn, err = dialer.Dial(osn.endpointCriteria()) 1040 return true, err 1041 }).Should(gomega.BeTrue()) 1042 newStream := cluster.NewImpatientStream(conn, time.Millisecond*100) 1043 defer conn.Close() 1044 // Shutdown the OSN 1045 osn.stop() 1046 // Ensure the port isn't open anymore 1047 gt.Eventually(func() (bool, error) { 1048 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) 1049 defer cancel() 1050 conn, _ := grpc.DialContext(ctx, osn.srv.Address(), grpc.WithBlock(), grpc.WithInsecure()) 1051 if conn != nil { 1052 conn.Close() 1053 return false, nil 1054 } 1055 return true, nil 1056 }).Should(gomega.BeTrue()) 1057 stream, err := newStream() 1058 if err != nil { 1059 return 1060 } 1061 _, err = stream.Recv() 1062 assert.Error(t, err) 1063 } 1064 1065 func TestBlockPullerMaxRetriesExhausted(t *testing.T) { 1066 // Scenario: 1067 // The block puller is expected to pull blocks 1 to 3. 1068 // But the orderer only has blocks 1,2, and from some reason 1069 // it sends back block 2 twice (we do this so that we 1070 // don't rely on timeout, because timeouts are flaky in tests). 1071 // It should attempt to re-connect and to send requests 1072 // until the attempt number is exhausted, after which 1073 // it gives up, and nil is returned. 1074 1075 osn := newClusterNode(t) 1076 defer osn.stop() 1077 1078 // We report having up to block 3. 1079 osn.enqueueResponse(3) 1080 osn.addExpectProbeAssert() 1081 // We send blocks 1 1082 osn.addExpectPullAssert(1) 1083 osn.enqueueResponse(1) 1084 // And 2, twice. 1085 osn.enqueueResponse(2) 1086 osn.enqueueResponse(2) 1087 // A nil message signals the deliver stream closes. 1088 // This is to signal the server side to prepare for a new deliver 1089 // stream that the client should open. 1090 osn.blockResponses <- nil 1091 1092 for i := 0; i < 2; i++ { 1093 // Therefore, the block puller should disconnect and reconnect. 1094 osn.addExpectProbeAssert() 1095 // We report having up to block 3. 1096 osn.enqueueResponse(3) 1097 // And we expect to be asked for block 3, since blocks 1, 2 1098 // have already been passed to the caller. 1099 osn.addExpectPullAssert(3) 1100 // Once again, we send 2 instead of 3 1101 osn.enqueueResponse(2) 1102 // The client disconnects again 1103 osn.blockResponses <- nil 1104 } 1105 1106 dialer := newCountingDialer() 1107 bp := newBlockPuller(dialer, osn.srv.Address()) 1108 1109 var exhaustedRetryAttemptsLogged bool 1110 1111 bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1112 if entry.Message == "Failed pulling block [3]: retry count exhausted(2)" { 1113 exhaustedRetryAttemptsLogged = true 1114 } 1115 return nil 1116 })) 1117 1118 bp.MaxPullBlockRetries = 2 1119 // We don't expect to timeout in this test, so make the timeout large 1120 // to prevent flakes due to CPU starvation. 1121 bp.FetchTimeout = time.Hour 1122 // Make the buffer tiny, only a single byte - in order deliver blocks 1123 // to the caller one by one and not store them in the buffer. 1124 bp.MaxTotalBufferBytes = 1 1125 1126 // Assert reception of blocks 1 to 3 1127 assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number) 1128 assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number) 1129 assert.Nil(t, bp.PullBlock(uint64(3))) 1130 1131 bp.Close() 1132 dialer.assertAllConnectionsClosed(t) 1133 assert.True(t, exhaustedRetryAttemptsLogged) 1134 }