github.com/kaituanwang/hyperledger@v2.0.1+incompatible/orderer/common/cluster/deliver_test.go

github.com/kaituanwang/hyperledger@v2.0.1+incompatible/orderer/common/cluster/deliver_test.go (about)

     1  /*
     2  Copyright IBM Corp. 2018 All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package cluster_test
     8  
     9  import (
    10  	"context"
    11  	"crypto/rand"
    12  	"encoding/hex"
    13  	"fmt"
    14  	"net"
    15  	"strconv"
    16  	"strings"
    17  	"sync"
    18  	"sync/atomic"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/golang/protobuf/proto"
    23  	"github.com/hyperledger/fabric-protos-go/common"
    24  	"github.com/hyperledger/fabric-protos-go/orderer"
    25  	"github.com/hyperledger/fabric/bccsp/factory"
    26  	"github.com/hyperledger/fabric/common/flogging"
    27  	"github.com/hyperledger/fabric/core/comm"
    28  	"github.com/hyperledger/fabric/internal/pkg/identity"
    29  	"github.com/hyperledger/fabric/orderer/common/cluster"
    30  	"github.com/hyperledger/fabric/orderer/common/cluster/mocks"
    31  	"github.com/hyperledger/fabric/protoutil"
    32  	"github.com/onsi/gomega"
    33  	"github.com/pkg/errors"
    34  	"github.com/stretchr/testify/assert"
    35  	"go.uber.org/zap"
    36  	"go.uber.org/zap/zapcore"
    37  	"google.golang.org/grpc"
    38  	"google.golang.org/grpc/balancer"
    39  	"google.golang.org/grpc/balancer/roundrobin"
    40  )
    41  
    42  // protects gRPC balancer registration
    43  var gRPCBalancerLock = sync.Mutex{}
    44  
    45  func init() {
    46  	factory.InitFactories(nil)
    47  }
    48  
    49  //go:generate counterfeiter -o mocks/signer_serializer.go --fake-name SignerSerializer . signerSerializer
    50  
    51  type signerSerializer interface {
    52  	identity.SignerSerializer
    53  }
    54  
    55  type wrappedBalancer struct {
    56  	balancer.Balancer
    57  	balancer.V2Balancer
    58  	cd *countingDialer
    59  }
    60  
    61  func (wb *wrappedBalancer) Close() {
    62  	defer atomic.AddUint32(&wb.cd.connectionCount, ^uint32(0))
    63  	wb.Balancer.Close()
    64  }
    65  
    66  type countingDialer struct {
    67  	name            string
    68  	baseBuilder     balancer.Builder
    69  	connectionCount uint32
    70  }
    71  
    72  func newCountingDialer() *countingDialer {
    73  	gRPCBalancerLock.Lock()
    74  	builder := balancer.Get(roundrobin.Name)
    75  	gRPCBalancerLock.Unlock()
    76  
    77  	buff := make([]byte, 16)
    78  	rand.Read(buff)
    79  	cb := &countingDialer{
    80  		name:        hex.EncodeToString(buff),
    81  		baseBuilder: builder,
    82  	}
    83  
    84  	gRPCBalancerLock.Lock()
    85  	balancer.Register(cb)
    86  	gRPCBalancerLock.Unlock()
    87  
    88  	return cb
    89  }
    90  
    91  func (d *countingDialer) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
    92  	defer atomic.AddUint32(&d.connectionCount, 1)
    93  	lb := d.baseBuilder.Build(cc, opts)
    94  	return &wrappedBalancer{
    95  		Balancer:   lb,
    96  		V2Balancer: lb.(balancer.V2Balancer),
    97  		cd:         d,
    98  	}
    99  }
   100  
   101  func (d *countingDialer) Name() string {
   102  	return d.name
   103  }
   104  
   105  func (d *countingDialer) assertAllConnectionsClosed(t *testing.T) {
   106  	timeLimit := time.Now().Add(timeout)
   107  	for atomic.LoadUint32(&d.connectionCount) != uint32(0) && time.Now().Before(timeLimit) {
   108  		time.Sleep(time.Millisecond)
   109  	}
   110  	assert.Equal(t, uint32(0), atomic.LoadUint32(&d.connectionCount))
   111  }
   112  
   113  func (d *countingDialer) Dial(address cluster.EndpointCriteria) (*grpc.ClientConn, error) {
   114  	ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
   115  	defer cancel()
   116  
   117  	gRPCBalancerLock.Lock()
   118  	balancer := grpc.WithBalancerName(d.name)
   119  	gRPCBalancerLock.Unlock()
   120  	return grpc.DialContext(ctx, address.Endpoint, grpc.WithBlock(), grpc.WithInsecure(), balancer)
   121  }
   122  
   123  func noopBlockVerifierf(_ []*common.Block, _ string) error {
   124  	return nil
   125  }
   126  
   127  func readSeekEnvelope(stream orderer.AtomicBroadcast_DeliverServer) (*orderer.SeekInfo, string, error) {
   128  	env, err := stream.Recv()
   129  	if err != nil {
   130  		return nil, "", err
   131  	}
   132  	payload, err := protoutil.UnmarshalPayload(env.Payload)
   133  	if err != nil {
   134  		return nil, "", err
   135  	}
   136  	seekInfo := &orderer.SeekInfo{}
   137  	if err = proto.Unmarshal(payload.Data, seekInfo); err != nil {
   138  		return nil, "", err
   139  	}
   140  	chdr := &common.ChannelHeader{}
   141  	if err = proto.Unmarshal(payload.Header.ChannelHeader, chdr); err != nil {
   142  		return nil, "", err
   143  	}
   144  	return seekInfo, chdr.ChannelId, nil
   145  }
   146  
   147  type deliverServer struct {
   148  	t *testing.T
   149  	sync.Mutex
   150  	err            error
   151  	srv            *comm.GRPCServer
   152  	seekAssertions chan func(*orderer.SeekInfo, string)
   153  	blockResponses chan *orderer.DeliverResponse
   154  }
   155  
   156  func (ds *deliverServer) endpointCriteria() cluster.EndpointCriteria {
   157  	return cluster.EndpointCriteria{Endpoint: ds.srv.Address()}
   158  }
   159  
   160  func (ds *deliverServer) isFaulty() bool {
   161  	ds.Lock()
   162  	defer ds.Unlock()
   163  	return ds.err != nil
   164  }
   165  
   166  func (*deliverServer) Broadcast(orderer.AtomicBroadcast_BroadcastServer) error {
   167  	panic("implement me")
   168  }
   169  
   170  func (ds *deliverServer) Deliver(stream orderer.AtomicBroadcast_DeliverServer) error {
   171  	ds.Lock()
   172  	err := ds.err
   173  	ds.Unlock()
   174  
   175  	if err != nil {
   176  		return err
   177  	}
   178  	seekInfo, channel, err := readSeekEnvelope(stream)
   179  	if err != nil {
   180  		panic(err)
   181  	}
   182  	// Get the next seek assertion and ensure the next seek is of the expected type
   183  	seekAssert := <-ds.seekAssertions
   184  	seekAssert(seekInfo, channel)
   185  
   186  	if seekInfo.GetStart().GetSpecified() != nil {
   187  		return ds.deliverBlocks(stream)
   188  	}
   189  	if seekInfo.GetStart().GetNewest() != nil {
   190  		resp := <-ds.blocks()
   191  		if resp == nil {
   192  			return nil
   193  		}
   194  		return stream.Send(resp)
   195  	}
   196  	panic(fmt.Sprintf("expected either specified or newest seek but got %v", seekInfo.GetStart()))
   197  }
   198  
   199  func (ds *deliverServer) deliverBlocks(stream orderer.AtomicBroadcast_DeliverServer) error {
   200  	for {
   201  		blockChan := ds.blocks()
   202  		response := <-blockChan
   203  		// A nil response is a signal from the test to close the stream.
   204  		// This is needed to avoid reading from the block buffer, hence
   205  		// consuming by accident a block that is tabled to be pulled
   206  		// later in the test.
   207  		if response == nil {
   208  			return nil
   209  		}
   210  		if err := stream.Send(response); err != nil {
   211  			return err
   212  		}
   213  	}
   214  }
   215  
   216  func (ds *deliverServer) blocks() chan *orderer.DeliverResponse {
   217  	ds.Lock()
   218  	defer ds.Unlock()
   219  	blockChan := ds.blockResponses
   220  	return blockChan
   221  }
   222  
   223  func (ds *deliverServer) setBlocks(blocks chan *orderer.DeliverResponse) {
   224  	ds.Lock()
   225  	defer ds.Unlock()
   226  	ds.blockResponses = blocks
   227  }
   228  
   229  func (ds *deliverServer) port() int {
   230  	_, portStr, err := net.SplitHostPort(ds.srv.Address())
   231  	assert.NoError(ds.t, err)
   232  
   233  	port, err := strconv.ParseInt(portStr, 10, 32)
   234  	assert.NoError(ds.t, err)
   235  	return int(port)
   236  }
   237  
   238  func (ds *deliverServer) resurrect() {
   239  	var err error
   240  	// copy the responses channel into a fresh one
   241  	respChan := make(chan *orderer.DeliverResponse, 100)
   242  	for resp := range ds.blocks() {
   243  		respChan <- resp
   244  	}
   245  	ds.blockResponses = respChan
   246  	ds.srv.Stop()
   247  	// And re-create the gRPC server on that port
   248  	ds.srv, err = comm.NewGRPCServer(fmt.Sprintf("127.0.0.1:%d", ds.port()), comm.ServerConfig{})
   249  	assert.NoError(ds.t, err)
   250  	orderer.RegisterAtomicBroadcastServer(ds.srv.Server(), ds)
   251  	go ds.srv.Start()
   252  }
   253  
   254  func (ds *deliverServer) stop() {
   255  	ds.srv.Stop()
   256  	close(ds.blocks())
   257  }
   258  
   259  func (ds *deliverServer) enqueueResponse(seq uint64) {
   260  	ds.blocks() <- &orderer.DeliverResponse{
   261  		Type: &orderer.DeliverResponse_Block{Block: protoutil.NewBlock(seq, nil)},
   262  	}
   263  }
   264  
   265  func (ds *deliverServer) addExpectProbeAssert() {
   266  	ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) {
   267  		assert.NotNil(ds.t, info.GetStart().GetNewest())
   268  		assert.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT)
   269  	}
   270  }
   271  
   272  func (ds *deliverServer) addExpectPullAssert(seq uint64) {
   273  	ds.seekAssertions <- func(info *orderer.SeekInfo, _ string) {
   274  		assert.NotNil(ds.t, info.GetStart().GetSpecified())
   275  		assert.Equal(ds.t, seq, info.GetStart().GetSpecified().Number)
   276  		assert.Equal(ds.t, info.ErrorResponse, orderer.SeekInfo_BEST_EFFORT)
   277  	}
   278  }
   279  
   280  func newClusterNode(t *testing.T) *deliverServer {
   281  	srv, err := comm.NewGRPCServer("127.0.0.1:0", comm.ServerConfig{})
   282  	if err != nil {
   283  		panic(err)
   284  	}
   285  	ds := &deliverServer{
   286  		t:              t,
   287  		seekAssertions: make(chan func(*orderer.SeekInfo, string), 100),
   288  		blockResponses: make(chan *orderer.DeliverResponse, 100),
   289  		srv:            srv,
   290  	}
   291  	orderer.RegisterAtomicBroadcastServer(srv.Server(), ds)
   292  	go srv.Start()
   293  	return ds
   294  }
   295  
   296  func newBlockPuller(dialer *countingDialer, orderers ...string) *cluster.BlockPuller {
   297  	return &cluster.BlockPuller{
   298  		Dialer:              dialer,
   299  		Channel:             "mychannel",
   300  		Signer:              &mocks.SignerSerializer{},
   301  		Endpoints:           endpointCriteriaFromEndpoints(orderers...),
   302  		FetchTimeout:        time.Second * 10,
   303  		MaxTotalBufferBytes: 1024 * 1024, // 1MB
   304  		RetryTimeout:        time.Millisecond * 10,
   305  		VerifyBlockSequence: noopBlockVerifierf,
   306  		Logger:              flogging.MustGetLogger("test"),
   307  	}
   308  }
   309  
   310  func endpointCriteriaFromEndpoints(orderers ...string) []cluster.EndpointCriteria {
   311  	var res []cluster.EndpointCriteria
   312  	for _, orderer := range orderers {
   313  		res = append(res, cluster.EndpointCriteria{Endpoint: orderer})
   314  	}
   315  	return res
   316  }
   317  
   318  func TestBlockPullerBasicHappyPath(t *testing.T) {
   319  	// Scenario: Single ordering node,
   320  	// and the block puller pulls blocks 5 to 10.
   321  	osn := newClusterNode(t)
   322  	defer osn.stop()
   323  
   324  	dialer := newCountingDialer()
   325  	bp := newBlockPuller(dialer, osn.srv.Address())
   326  
   327  	// The first seek request asks for the latest block
   328  	osn.addExpectProbeAssert()
   329  	// The first response says that the height is 10
   330  	osn.enqueueResponse(10)
   331  	// The next seek request is for block 5
   332  	osn.addExpectPullAssert(5)
   333  	// The later responses are the block themselves
   334  	for i := 5; i <= 10; i++ {
   335  		osn.enqueueResponse(uint64(i))
   336  	}
   337  
   338  	for i := 5; i <= 10; i++ {
   339  		assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number)
   340  	}
   341  	assert.Len(t, osn.blockResponses, 0)
   342  
   343  	bp.Close()
   344  	dialer.assertAllConnectionsClosed(t)
   345  }
   346  
   347  func TestBlockPullerDuplicate(t *testing.T) {
   348  	// Scenario: The address of the ordering node
   349  	// is found twice in the configuration, but this
   350  	// doesn't create a problem.
   351  	osn := newClusterNode(t)
   352  	defer osn.stop()
   353  
   354  	dialer := newCountingDialer()
   355  	// Add the address twice
   356  	bp := newBlockPuller(dialer, osn.srv.Address(), osn.srv.Address())
   357  
   358  	// The first seek request asks for the latest block (twice)
   359  	osn.addExpectProbeAssert()
   360  	osn.addExpectProbeAssert()
   361  	// The first response says that the height is 3
   362  	osn.enqueueResponse(3)
   363  	osn.enqueueResponse(3)
   364  	// The next seek request is for block 1, only from some OSN
   365  	osn.addExpectPullAssert(1)
   366  	// The later responses are the block themselves
   367  	for i := 1; i <= 3; i++ {
   368  		osn.enqueueResponse(uint64(i))
   369  	}
   370  
   371  	for i := 1; i <= 3; i++ {
   372  		assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number)
   373  	}
   374  	assert.Len(t, osn.blockResponses, 0)
   375  
   376  	bp.Close()
   377  	dialer.assertAllConnectionsClosed(t)
   378  }
   379  
   380  func TestBlockPullerHeavyBlocks(t *testing.T) {
   381  	// Scenario: Single ordering node,
   382  	// and the block puller pulls 50 blocks, each
   383  	// weighing 1K, but the buffer can only contain
   384  	// 10K, so it should pull the 50 blocks in chunks of 10,
   385  	// and verify 5 sequences at a time.
   386  
   387  	osn := newClusterNode(t)
   388  	defer osn.stop()
   389  	osn.addExpectProbeAssert()
   390  	osn.addExpectPullAssert(1)
   391  	osn.enqueueResponse(100) // The last sequence is 100
   392  
   393  	enqueueBlockBatch := func(start, end uint64) {
   394  		for seq := start; seq <= end; seq++ {
   395  			resp := &orderer.DeliverResponse{
   396  				Type: &orderer.DeliverResponse_Block{
   397  					Block: protoutil.NewBlock(seq, nil),
   398  				},
   399  			}
   400  			data := resp.GetBlock().Data.Data
   401  			resp.GetBlock().Data.Data = append(data, make([]byte, 1024))
   402  			osn.blockResponses <- resp
   403  		}
   404  	}
   405  
   406  	dialer := newCountingDialer()
   407  	bp := newBlockPuller(dialer, osn.srv.Address())
   408  	var gotBlockMessageCount int
   409  	bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
   410  		if strings.Contains(entry.Message, "Got block") {
   411  			gotBlockMessageCount++
   412  		}
   413  		return nil
   414  	}))
   415  	bp.MaxTotalBufferBytes = 1024 * 10 // 10K
   416  
   417  	// Enqueue only the next batch in the orderer node.
   418  	// This ensures that only 10 blocks are fetched into the buffer
   419  	// and not more.
   420  	for i := uint64(0); i < 5; i++ {
   421  		enqueueBlockBatch(i*10+uint64(1), i*10+uint64(10))
   422  		for seq := i*10 + uint64(1); seq <= i*10+uint64(10); seq++ {
   423  			assert.Equal(t, seq, bp.PullBlock(seq).Header.Number)
   424  		}
   425  	}
   426  
   427  	assert.Equal(t, 50, gotBlockMessageCount)
   428  	bp.Close()
   429  	dialer.assertAllConnectionsClosed(t)
   430  }
   431  
   432  func TestBlockPullerClone(t *testing.T) {
   433  	// Scenario: We have a block puller that is connected
   434  	// to an ordering node, and we clone it.
   435  	// We expect that the new block puller is a clean slate
   436  	// and doesn't share any internal state with its origin.
   437  	osn1 := newClusterNode(t)
   438  	defer osn1.stop()
   439  
   440  	osn1.addExpectProbeAssert()
   441  	osn1.addExpectPullAssert(1)
   442  	// last block sequence is 100
   443  	osn1.enqueueResponse(100)
   444  	osn1.enqueueResponse(1)
   445  	// The block puller is expected to disconnect after pulling
   446  	// a single block. So signal the server-side to avoid
   447  	// grabbing the next block after block 1 is pulled.
   448  	osn1.blockResponses <- nil
   449  
   450  	dialer := newCountingDialer()
   451  	bp := newBlockPuller(dialer, osn1.srv.Address())
   452  	bp.FetchTimeout = time.Millisecond * 100
   453  	// Pull a block at a time and don't buffer them
   454  	bp.MaxTotalBufferBytes = 1
   455  	// Clone the block puller
   456  	bpClone := bp.Clone()
   457  	// and override its channel
   458  	bpClone.Channel = "foo"
   459  	// Ensure the channel change doesn't reflect in the original puller
   460  	assert.Equal(t, "mychannel", bp.Channel)
   461  
   462  	block := bp.PullBlock(1)
   463  	assert.Equal(t, uint64(1), block.Header.Number)
   464  
   465  	// After the original block puller is closed, the
   466  	// clone should not be affected
   467  	bp.Close()
   468  	dialer.assertAllConnectionsClosed(t)
   469  
   470  	// The clone block puller should not have cached the internal state
   471  	// from its origin block puller, thus it should probe again for the last block
   472  	// sequence as if it is a new block puller.
   473  	osn1.addExpectProbeAssert()
   474  	osn1.addExpectPullAssert(2)
   475  	osn1.enqueueResponse(100)
   476  	osn1.enqueueResponse(2)
   477  
   478  	block = bpClone.PullBlock(2)
   479  	assert.Equal(t, uint64(2), block.Header.Number)
   480  
   481  	bpClone.Close()
   482  	dialer.assertAllConnectionsClosed(t)
   483  }
   484  
   485  func TestBlockPullerHeightsByEndpoints(t *testing.T) {
   486  	// Scenario: We ask for the latest block from all the known ordering nodes.
   487  	// One ordering node is unavailable (offline).
   488  	// One ordering node doesn't have blocks for that channel.
   489  	// The remaining node returns the latest block.
   490  	osn1 := newClusterNode(t)
   491  
   492  	osn2 := newClusterNode(t)
   493  	defer osn2.stop()
   494  
   495  	osn3 := newClusterNode(t)
   496  	defer osn3.stop()
   497  
   498  	dialer := newCountingDialer()
   499  	bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address())
   500  
   501  	// The first seek request asks for the latest block from some ordering node
   502  	osn1.addExpectProbeAssert()
   503  	osn2.addExpectProbeAssert()
   504  	osn3.addExpectProbeAssert()
   505  
   506  	// The first ordering node is offline
   507  	osn1.stop()
   508  	// The second returns a bad answer
   509  	osn2.blockResponses <- &orderer.DeliverResponse{
   510  		Type: &orderer.DeliverResponse_Status{Status: common.Status_FORBIDDEN},
   511  	}
   512  	// The third returns the latest block
   513  	osn3.enqueueResponse(5)
   514  
   515  	res, err := bp.HeightsByEndpoints()
   516  	assert.NoError(t, err)
   517  	expected := map[string]uint64{
   518  		osn3.srv.Address(): 6,
   519  	}
   520  	assert.Equal(t, expected, res)
   521  
   522  	bp.Close()
   523  	dialer.assertAllConnectionsClosed(t)
   524  }
   525  
   526  func TestBlockPullerMultipleOrderers(t *testing.T) {
   527  	// Scenario: 3 ordering nodes,
   528  	// and the block puller pulls blocks 3 to 5 from some
   529  	// orderer node.
   530  	// We ensure that exactly a single orderer node sent blocks.
   531  
   532  	osn1 := newClusterNode(t)
   533  	defer osn1.stop()
   534  
   535  	osn2 := newClusterNode(t)
   536  	defer osn2.stop()
   537  
   538  	osn3 := newClusterNode(t)
   539  	defer osn3.stop()
   540  
   541  	dialer := newCountingDialer()
   542  	bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address(), osn3.srv.Address())
   543  
   544  	// The first seek request asks for the latest block from some ordering node
   545  	osn1.addExpectProbeAssert()
   546  	osn2.addExpectProbeAssert()
   547  	osn3.addExpectProbeAssert()
   548  	// The first response says that the height is 5
   549  	osn1.enqueueResponse(5)
   550  	osn2.enqueueResponse(5)
   551  	osn3.enqueueResponse(5)
   552  
   553  	// The next seek request is for block 3
   554  	osn1.addExpectPullAssert(3)
   555  	osn2.addExpectPullAssert(3)
   556  	osn3.addExpectPullAssert(3)
   557  
   558  	// The later responses are the block themselves
   559  	for i := 3; i <= 5; i++ {
   560  		osn1.enqueueResponse(uint64(i))
   561  		osn2.enqueueResponse(uint64(i))
   562  		osn3.enqueueResponse(uint64(i))
   563  	}
   564  
   565  	initialTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses)
   566  
   567  	for i := 3; i <= 5; i++ {
   568  		assert.Equal(t, uint64(i), bp.PullBlock(uint64(i)).Header.Number)
   569  	}
   570  
   571  	// Assert the cumulative amount of blocks in the OSNs went down by 6:
   572  	// blocks 3, 4, 5 were pulled - that's 3 blocks.
   573  	// block 5 was pulled 3 times at the probe phase.
   574  	finalTotalBlockAmount := len(osn1.blockResponses) + len(osn2.blockResponses) + len(osn3.blockResponses)
   575  	assert.Equal(t, initialTotalBlockAmount-6, finalTotalBlockAmount)
   576  
   577  	bp.Close()
   578  	dialer.assertAllConnectionsClosed(t)
   579  }
   580  
   581  func TestBlockPullerFailover(t *testing.T) {
   582  	// Scenario:
   583  	// The block puller is expected to pull blocks 1 to 3.
   584  	// There are two ordering nodes, but at first -  only node 1 is available.
   585  	// the block puller first connects to it, but as it pulls the first block,
   586  	// it crashes.
   587  	// A second orderer is then spawned and the block puller is expected to
   588  	// connect to it and pull the rest of the blocks.
   589  
   590  	osn1 := newClusterNode(t)
   591  	osn1.addExpectProbeAssert()
   592  	osn1.addExpectPullAssert(1)
   593  	osn1.enqueueResponse(3)
   594  	osn1.enqueueResponse(1)
   595  
   596  	osn2 := newClusterNode(t)
   597  	defer osn2.stop()
   598  
   599  	osn2.addExpectProbeAssert()
   600  	osn2.addExpectPullAssert(1)
   601  	// First response is for the probe
   602  	osn2.enqueueResponse(3)
   603  	// Next three responses are for the pulling.
   604  	osn2.enqueueResponse(1)
   605  	osn2.enqueueResponse(2)
   606  	osn2.enqueueResponse(3)
   607  
   608  	// First we stop node 2 to make sure the block puller can't connect to it when it is created
   609  	osn2.stop()
   610  
   611  	dialer := newCountingDialer()
   612  	bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address())
   613  
   614  	// We don't want to rely on fetch timeout, but on pure failover logic,
   615  	// so make the fetch timeout huge
   616  	bp.FetchTimeout = time.Hour
   617  
   618  	// Configure the block puller logger to signal the wait group once the block puller
   619  	// received the first block.
   620  	var pulledBlock1 sync.WaitGroup
   621  	pulledBlock1.Add(1)
   622  	var once sync.Once
   623  	bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
   624  		if strings.Contains(entry.Message, "Got block [1] of size") {
   625  			once.Do(pulledBlock1.Done)
   626  		}
   627  		return nil
   628  	}))
   629  
   630  	go func() {
   631  		// Wait for the block puller to pull the first block
   632  		pulledBlock1.Wait()
   633  		// and now crash node1 and resurrect node 2
   634  		osn1.stop()
   635  		osn2.resurrect()
   636  	}()
   637  
   638  	// Assert reception of blocks 1 to 3
   639  	assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number)
   640  	assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number)
   641  	assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number)
   642  
   643  	bp.Close()
   644  	dialer.assertAllConnectionsClosed(t)
   645  }
   646  
   647  func TestBlockPullerNoneResponsiveOrderer(t *testing.T) {
   648  	// Scenario: There are two ordering nodes, and the block puller
   649  	// connects to one of them.
   650  	// it fetches the first block, but then the second block isn't sent
   651  	// for too long, and the block puller should abort, and try the other
   652  	// node.
   653  
   654  	osn1 := newClusterNode(t)
   655  	defer osn1.stop()
   656  
   657  	osn2 := newClusterNode(t)
   658  	defer osn2.stop()
   659  
   660  	osn1.addExpectProbeAssert()
   661  	osn2.addExpectProbeAssert()
   662  	osn1.enqueueResponse(3)
   663  	osn2.enqueueResponse(3)
   664  
   665  	dialer := newCountingDialer()
   666  	bp := newBlockPuller(dialer, osn1.srv.Address(), osn2.srv.Address())
   667  	bp.FetchTimeout = time.Millisecond * 100
   668  
   669  	notInUseOrdererNode := osn2
   670  	// Configure the logger to tell us who is the orderer node that the block puller
   671  	// isn't connected to. This is done by intercepting the appropriate message
   672  	var waitForConnection sync.WaitGroup
   673  	waitForConnection.Add(1)
   674  	var once sync.Once
   675  	bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
   676  		if !strings.Contains(entry.Message, "Sending request for block [1]") {
   677  			return nil
   678  		}
   679  		defer once.Do(waitForConnection.Done)
   680  		s := entry.Message[len("Sending request for block [1] to 127.0.0.1:"):]
   681  		port, err := strconv.ParseInt(s, 10, 32)
   682  		assert.NoError(t, err)
   683  		// If osn2 is the current orderer we're connected to,
   684  		// the orderer we're not connected to, is osn1
   685  		if osn2.port() == int(port) {
   686  			notInUseOrdererNode = osn1
   687  			// Enqueue block 1 to the current orderer the block puller
   688  			// is connected to
   689  			osn2.enqueueResponse(1)
   690  			osn2.addExpectPullAssert(1)
   691  		} else {
   692  			// We're connected to osn1, so enqueue block 1
   693  			osn1.enqueueResponse(1)
   694  			osn1.addExpectPullAssert(1)
   695  		}
   696  		return nil
   697  	}))
   698  
   699  	go func() {
   700  		waitForConnection.Wait()
   701  		// Enqueue the height int the orderer we're connected to
   702  		notInUseOrdererNode.enqueueResponse(3)
   703  		notInUseOrdererNode.addExpectProbeAssert()
   704  		// Enqueue blocks 1, 2, 3 to the orderer node we're not connected to.
   705  		notInUseOrdererNode.addExpectPullAssert(1)
   706  		notInUseOrdererNode.enqueueResponse(1)
   707  		notInUseOrdererNode.enqueueResponse(2)
   708  		notInUseOrdererNode.enqueueResponse(3)
   709  	}()
   710  
   711  	// Assert reception of blocks 1 to 3
   712  	assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number)
   713  	assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number)
   714  	assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number)
   715  
   716  	bp.Close()
   717  	dialer.assertAllConnectionsClosed(t)
   718  }
   719  
   720  func TestBlockPullerNoOrdererAliveAtStartup(t *testing.T) {
   721  	// Scenario: Single ordering node, and when the block puller
   722  	// starts up - the orderer is nowhere to be found.
   723  	osn := newClusterNode(t)
   724  	osn.stop()
   725  	defer osn.stop()
   726  
   727  	dialer := newCountingDialer()
   728  	bp := newBlockPuller(dialer, osn.srv.Address())
   729  
   730  	// Configure the logger to wait until the a failed connection attempt was made
   731  	var connectionAttempt sync.WaitGroup
   732  	connectionAttempt.Add(1)
   733  	bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
   734  		if strings.Contains(entry.Message, "Failed connecting to") {
   735  			connectionAttempt.Done()
   736  		}
   737  		return nil
   738  	}))
   739  
   740  	go func() {
   741  		connectionAttempt.Wait()
   742  		osn.resurrect()
   743  		// The first seek request asks for the latest block
   744  		osn.addExpectProbeAssert()
   745  		// The first response says that the last sequence is 2
   746  		osn.enqueueResponse(2)
   747  		// The next seek request is for block 1
   748  		osn.addExpectPullAssert(1)
   749  		// And the orderer returns block 1 and 2
   750  		osn.enqueueResponse(1)
   751  		osn.enqueueResponse(2)
   752  	}()
   753  
   754  	assert.Equal(t, uint64(1), bp.PullBlock(1).Header.Number)
   755  	assert.Equal(t, uint64(2), bp.PullBlock(2).Header.Number)
   756  
   757  	bp.Close()
   758  	dialer.assertAllConnectionsClosed(t)
   759  }
   760  
   761  func TestBlockPullerFailures(t *testing.T) {
   762  	// Scenario: Single ordering node is faulty, but later
   763  	// on it recovers.
   764  	failureError := errors.New("oops, something went wrong")
   765  	failStream := func(osn *deliverServer, _ *cluster.BlockPuller) {
   766  		osn.Lock()
   767  		osn.err = failureError
   768  		osn.Unlock()
   769  	}
   770  
   771  	badSigErr := errors.New("bad signature")
   772  	malformBlockSignatureAndRecreateOSNBuffer := func(osn *deliverServer, bp *cluster.BlockPuller) {
   773  		bp.VerifyBlockSequence = func(_ []*common.Block, _ string) error {
   774  			close(osn.blocks())
   775  			// After failing once, recover and remove the bad signature error.
   776  			defer func() {
   777  				// Skip recovery if we already recovered.
   778  				if badSigErr == nil {
   779  					return
   780  				}
   781  				badSigErr = nil
   782  				osn.setBlocks(make(chan *orderer.DeliverResponse, 100))
   783  				osn.enqueueResponse(3)
   784  				osn.enqueueResponse(1)
   785  				osn.enqueueResponse(2)
   786  				osn.enqueueResponse(3)
   787  			}()
   788  			return badSigErr
   789  		}
   790  	}
   791  
   792  	noFailFunc := func(_ *deliverServer, _ *cluster.BlockPuller) {}
   793  
   794  	recover := func(osn *deliverServer, bp *cluster.BlockPuller) func(entry zapcore.Entry) error {
   795  		return func(entry zapcore.Entry) error {
   796  			if osn.isFaulty() && strings.Contains(entry.Message, failureError.Error()) {
   797  				osn.Lock()
   798  				osn.err = nil
   799  				osn.Unlock()
   800  			}
   801  			if strings.Contains(entry.Message, "Failed verifying") {
   802  				bp.VerifyBlockSequence = noopBlockVerifierf
   803  			}
   804  			return nil
   805  		}
   806  	}
   807  
   808  	failAfterConnection := func(osn *deliverServer, logTrigger string, failFunc func()) func(entry zapcore.Entry) error {
   809  		once := &sync.Once{}
   810  		return func(entry zapcore.Entry) error {
   811  			if !osn.isFaulty() && strings.Contains(entry.Message, logTrigger) {
   812  				once.Do(func() {
   813  					failFunc()
   814  				})
   815  			}
   816  			return nil
   817  		}
   818  	}
   819  
   820  	for _, testCase := range []struct {
   821  		name       string
   822  		logTrigger string
   823  		beforeFunc func(*deliverServer, *cluster.BlockPuller)
   824  		failFunc   func(*deliverServer, *cluster.BlockPuller)
   825  	}{
   826  		{
   827  			name:       "failure at probe",
   828  			logTrigger: "skip this for this test case",
   829  			beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) {
   830  				failStream(osn, nil)
   831  				// The first seek request asks for the latest block but it fails
   832  				osn.addExpectProbeAssert()
   833  				// And the last block sequence is returned
   834  				osn.enqueueResponse(3)
   835  				// The next seek request is for block 1
   836  				osn.addExpectPullAssert(1)
   837  			},
   838  			failFunc: noFailFunc,
   839  		},
   840  		{
   841  			name:       "failure at pull",
   842  			logTrigger: "Sending request for block [1]",
   843  			beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) {
   844  				// The first seek request asks for the latest block and succeeds
   845  				osn.addExpectProbeAssert()
   846  				// But as the block puller tries to pull, the stream fails, so it should reconnect.
   847  				osn.addExpectProbeAssert()
   848  				// We need to send the latest sequence twice because the stream fails after the first time.
   849  				osn.enqueueResponse(3)
   850  				osn.enqueueResponse(3)
   851  				osn.addExpectPullAssert(1)
   852  			},
   853  			failFunc: failStream,
   854  		},
   855  		{
   856  			name:       "failure at verifying pulled block",
   857  			logTrigger: "Sending request for block [1]",
   858  			beforeFunc: func(osn *deliverServer, bp *cluster.BlockPuller) {
   859  				// The first seek request asks for the latest block and succeeds
   860  				osn.addExpectProbeAssert()
   861  				osn.enqueueResponse(3)
   862  				// It then pulls all 3 blocks, but fails verifying them.
   863  				osn.addExpectPullAssert(1)
   864  				osn.enqueueResponse(1)
   865  				osn.enqueueResponse(2)
   866  				osn.enqueueResponse(3)
   867  				// So it probes again, and then requests block 1 again.
   868  				osn.addExpectProbeAssert()
   869  				osn.addExpectPullAssert(1)
   870  			},
   871  			failFunc: malformBlockSignatureAndRecreateOSNBuffer,
   872  		},
   873  	} {
   874  		testCase := testCase
   875  		t.Run(testCase.name, func(t *testing.T) {
   876  			osn := newClusterNode(t)
   877  			defer osn.stop()
   878  
   879  			dialer := newCountingDialer()
   880  			bp := newBlockPuller(dialer, osn.srv.Address())
   881  
   882  			testCase.beforeFunc(osn, bp)
   883  
   884  			// Configure the logger to trigger failure at the appropriate time.
   885  			fail := func() {
   886  				testCase.failFunc(osn, bp)
   887  			}
   888  			bp.Logger = bp.Logger.WithOptions(zap.Hooks(recover(osn, bp), failAfterConnection(osn, testCase.logTrigger, fail)))
   889  
   890  			// The orderer sends blocks 1 to 3
   891  			osn.enqueueResponse(1)
   892  			osn.enqueueResponse(2)
   893  			osn.enqueueResponse(3)
   894  
   895  			assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number)
   896  			assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number)
   897  			assert.Equal(t, uint64(3), bp.PullBlock(uint64(3)).Header.Number)
   898  
   899  			bp.Close()
   900  			dialer.assertAllConnectionsClosed(t)
   901  		})
   902  	}
   903  }
   904  
   905  func TestBlockPullerBadBlocks(t *testing.T) {
   906  	// Scenario: ordering node sends malformed blocks.
   907  
   908  	removeHeader := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse {
   909  		resp.GetBlock().Header = nil
   910  		return resp
   911  	}
   912  
   913  	removeData := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse {
   914  		resp.GetBlock().Data = nil
   915  		return resp
   916  	}
   917  
   918  	removeMetadata := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse {
   919  		resp.GetBlock().Metadata = nil
   920  		return resp
   921  	}
   922  
   923  	changeType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse {
   924  		resp.Type = nil
   925  		return resp
   926  	}
   927  
   928  	statusType := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse {
   929  		resp.Type = &orderer.DeliverResponse_Status{
   930  			Status: common.Status_INTERNAL_SERVER_ERROR,
   931  		}
   932  		return resp
   933  	}
   934  
   935  	changeSequence := func(resp *orderer.DeliverResponse) *orderer.DeliverResponse {
   936  		resp.GetBlock().Header.Number = 3
   937  		return resp
   938  	}
   939  
   940  	testcases := []struct {
   941  		name           string
   942  		corruptBlock   func(block *orderer.DeliverResponse) *orderer.DeliverResponse
   943  		expectedErrMsg string
   944  	}{
   945  		{
   946  			name:           "nil header",
   947  			corruptBlock:   removeHeader,
   948  			expectedErrMsg: "block header is nil",
   949  		},
   950  		{
   951  			name:           "nil data",
   952  			corruptBlock:   removeData,
   953  			expectedErrMsg: "block data is nil",
   954  		},
   955  		{
   956  			name:           "nil metadata",
   957  			corruptBlock:   removeMetadata,
   958  			expectedErrMsg: "block metadata is empty",
   959  		},
   960  		{
   961  			name:           "wrong type",
   962  			corruptBlock:   changeType,
   963  			expectedErrMsg: "response is of type <nil>, but expected a block",
   964  		},
   965  		{
   966  			name:           "bad type",
   967  			corruptBlock:   statusType,
   968  			expectedErrMsg: "faulty node, received: status:INTERNAL_SERVER_ERROR ",
   969  		},
   970  		{
   971  			name:           "wrong number",
   972  			corruptBlock:   changeSequence,
   973  			expectedErrMsg: "got unexpected sequence",
   974  		},
   975  	}
   976  
   977  	for _, testCase := range testcases {
   978  		testCase := testCase
   979  		t.Run(testCase.name, func(t *testing.T) {
   980  
   981  			osn := newClusterNode(t)
   982  			defer osn.stop()
   983  
   984  			osn.addExpectProbeAssert()
   985  			osn.addExpectPullAssert(10)
   986  
   987  			dialer := newCountingDialer()
   988  			bp := newBlockPuller(dialer, osn.srv.Address())
   989  
   990  			osn.enqueueResponse(10)
   991  			osn.enqueueResponse(10)
   992  			// Fetch the first
   993  			block := <-osn.blockResponses
   994  			// Enqueue the probe response
   995  			// re-insert the pull response after corrupting it, to the tail of the queue
   996  			osn.blockResponses <- testCase.corruptBlock(block)
   997  			// [ 10  10* ]
   998  			// Expect the block puller to retry and this time give it what it wants
   999  			osn.addExpectProbeAssert()
  1000  			osn.addExpectPullAssert(10)
  1001  
  1002  			// Wait until the block is pulled and the malleability is detected
  1003  			var detectedBadBlock sync.WaitGroup
  1004  			detectedBadBlock.Add(1)
  1005  			bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
  1006  				if strings.Contains(entry.Message, fmt.Sprintf("Failed pulling blocks: %s", testCase.expectedErrMsg)) {
  1007  					detectedBadBlock.Done()
  1008  					// Close the channel to make the current server-side deliver stream close
  1009  					close(osn.blocks())
  1010  					// Ane reset the block buffer to be able to write into it again
  1011  					osn.setBlocks(make(chan *orderer.DeliverResponse, 100))
  1012  					// Put a correct block after it, 1 for the probing and 1 for the fetch
  1013  					osn.enqueueResponse(10)
  1014  					osn.enqueueResponse(10)
  1015  				}
  1016  				return nil
  1017  			}))
  1018  
  1019  			bp.PullBlock(10)
  1020  			detectedBadBlock.Wait()
  1021  
  1022  			bp.Close()
  1023  			dialer.assertAllConnectionsClosed(t)
  1024  		})
  1025  	}
  1026  }
  1027  
  1028  func TestImpatientStreamFailure(t *testing.T) {
  1029  	osn := newClusterNode(t)
  1030  	dialer := newCountingDialer()
  1031  	defer dialer.assertAllConnectionsClosed(t)
  1032  	// Wait for OSN to start
  1033  	// by trying to connect to it
  1034  	var conn *grpc.ClientConn
  1035  	var err error
  1036  
  1037  	gt := gomega.NewGomegaWithT(t)
  1038  	gt.Eventually(func() (bool, error) {
  1039  		conn, err = dialer.Dial(osn.endpointCriteria())
  1040  		return true, err
  1041  	}).Should(gomega.BeTrue())
  1042  	newStream := cluster.NewImpatientStream(conn, time.Millisecond*100)
  1043  	defer conn.Close()
  1044  	// Shutdown the OSN
  1045  	osn.stop()
  1046  	// Ensure the port isn't open anymore
  1047  	gt.Eventually(func() (bool, error) {
  1048  		ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10)
  1049  		defer cancel()
  1050  		conn, _ := grpc.DialContext(ctx, osn.srv.Address(), grpc.WithBlock(), grpc.WithInsecure())
  1051  		if conn != nil {
  1052  			conn.Close()
  1053  			return false, nil
  1054  		}
  1055  		return true, nil
  1056  	}).Should(gomega.BeTrue())
  1057  	stream, err := newStream()
  1058  	if err != nil {
  1059  		return
  1060  	}
  1061  	_, err = stream.Recv()
  1062  	assert.Error(t, err)
  1063  }
  1064  
  1065  func TestBlockPullerMaxRetriesExhausted(t *testing.T) {
  1066  	// Scenario:
  1067  	// The block puller is expected to pull blocks 1 to 3.
  1068  	// But the orderer only has blocks 1,2, and from some reason
  1069  	// it sends back block 2 twice (we do this so that we
  1070  	// don't rely on timeout, because timeouts are flaky in tests).
  1071  	// It should attempt to re-connect and to send requests
  1072  	// until the attempt number is exhausted, after which
  1073  	// it gives up, and nil is returned.
  1074  
  1075  	osn := newClusterNode(t)
  1076  	defer osn.stop()
  1077  
  1078  	// We report having up to block 3.
  1079  	osn.enqueueResponse(3)
  1080  	osn.addExpectProbeAssert()
  1081  	// We send blocks 1
  1082  	osn.addExpectPullAssert(1)
  1083  	osn.enqueueResponse(1)
  1084  	// And 2, twice.
  1085  	osn.enqueueResponse(2)
  1086  	osn.enqueueResponse(2)
  1087  	// A nil message signals the deliver stream closes.
  1088  	// This is to signal the server side to prepare for a new deliver
  1089  	// stream that the client should open.
  1090  	osn.blockResponses <- nil
  1091  
  1092  	for i := 0; i < 2; i++ {
  1093  		// Therefore, the block puller should disconnect and reconnect.
  1094  		osn.addExpectProbeAssert()
  1095  		// We report having up to block 3.
  1096  		osn.enqueueResponse(3)
  1097  		// And we expect to be asked for block 3, since blocks 1, 2
  1098  		// have already been passed to the caller.
  1099  		osn.addExpectPullAssert(3)
  1100  		// Once again, we send 2 instead of 3
  1101  		osn.enqueueResponse(2)
  1102  		// The client disconnects again
  1103  		osn.blockResponses <- nil
  1104  	}
  1105  
  1106  	dialer := newCountingDialer()
  1107  	bp := newBlockPuller(dialer, osn.srv.Address())
  1108  
  1109  	var exhaustedRetryAttemptsLogged bool
  1110  
  1111  	bp.Logger = bp.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
  1112  		if entry.Message == "Failed pulling block [3]: retry count exhausted(2)" {
  1113  			exhaustedRetryAttemptsLogged = true
  1114  		}
  1115  		return nil
  1116  	}))
  1117  
  1118  	bp.MaxPullBlockRetries = 2
  1119  	// We don't expect to timeout in this test, so make the timeout large
  1120  	// to prevent flakes due to CPU starvation.
  1121  	bp.FetchTimeout = time.Hour
  1122  	// Make the buffer tiny, only a single byte - in order deliver blocks
  1123  	// to the caller one by one and not store them in the buffer.
  1124  	bp.MaxTotalBufferBytes = 1
  1125  
  1126  	// Assert reception of blocks 1 to 3
  1127  	assert.Equal(t, uint64(1), bp.PullBlock(uint64(1)).Header.Number)
  1128  	assert.Equal(t, uint64(2), bp.PullBlock(uint64(2)).Header.Number)
  1129  	assert.Nil(t, bp.PullBlock(uint64(3)))
  1130  
  1131  	bp.Close()
  1132  	dialer.assertAllConnectionsClosed(t)
  1133  	assert.True(t, exhaustedRetryAttemptsLogged)
  1134  }