github.com/adnan-c/fabric_e2e_couchdb@v0.6.1-preview.0.20170228180935-21ce6b23cf91/orderer/kafka/orderer.go (about)

     1  /*
     2  Copyright IBM Corp. 2016 All Rights Reserved.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8                   http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package kafka
    18  
    19  import (
    20  	"time"
    21  
    22  	"github.com/Shopify/sarama"
    23  	"github.com/golang/protobuf/proto"
    24  	"github.com/hyperledger/fabric/orderer/localconfig"
    25  	"github.com/hyperledger/fabric/orderer/multichain"
    26  	cb "github.com/hyperledger/fabric/protos/common"
    27  	ab "github.com/hyperledger/fabric/protos/orderer"
    28  	"github.com/hyperledger/fabric/protos/utils"
    29  )
    30  
    31  // New creates a Kafka-backed consenter. Called by orderer's main.go.
    32  func New(kv sarama.KafkaVersion, ro config.Retry, tls config.TLS) multichain.Consenter {
    33  	return newConsenter(kv, ro, tls, bfValue, pfValue, cfValue)
    34  }
    35  
    36  // New calls here because we need to pass additional arguments to
    37  // the constructor and New() should only read from the config file.
    38  func newConsenter(kv sarama.KafkaVersion, ro config.Retry, tls config.TLS, bf bfType, pf pfType, cf cfType) multichain.Consenter {
    39  	return &consenterImpl{kv, ro, tls, bf, pf, cf}
    40  }
    41  
    42  // bfType defines the signature of the broker constructor.
    43  type bfType func([]string, ChainPartition) (Broker, error)
    44  
    45  // pfType defines the signature of the producer constructor.
    46  type pfType func([]string, sarama.KafkaVersion, config.Retry, config.TLS) Producer
    47  
    48  // cfType defines the signature of the consumer constructor.
    49  type cfType func([]string, sarama.KafkaVersion, config.TLS, ChainPartition, int64) (Consumer, error)
    50  
    51  // bfValue holds the value for the broker constructor that's used in the non-test case.
    52  var bfValue = func(brokers []string, cp ChainPartition) (Broker, error) {
    53  	return newBroker(brokers, cp)
    54  }
    55  
    56  // pfValue holds the value for the producer constructor that's used in the non-test case.
    57  var pfValue = func(brokers []string, kafkaVersion sarama.KafkaVersion, retryOptions config.Retry, tls config.TLS) Producer {
    58  	return newProducer(brokers, kafkaVersion, retryOptions, tls)
    59  }
    60  
    61  // cfValue holds the value for the consumer constructor that's used in the non-test case.
    62  var cfValue = func(brokers []string, kafkaVersion sarama.KafkaVersion, tls config.TLS, cp ChainPartition, offset int64) (Consumer, error) {
    63  	return newConsumer(brokers, kafkaVersion, tls, cp, offset)
    64  }
    65  
    66  // consenterImpl holds the implementation of type that satisfies the
    67  // multichain.Consenter and testableConsenter interfaces. The former
    68  // is needed because that is what the HandleChain contract requires.
    69  // The latter is needed for testing.
    70  type consenterImpl struct {
    71  	kv  sarama.KafkaVersion
    72  	ro  config.Retry
    73  	tls config.TLS
    74  	bf  bfType
    75  	pf  pfType
    76  	cf  cfType
    77  }
    78  
    79  // HandleChain creates/returns a reference to a Chain for the given set of support resources.
    80  // Implements the multichain.Consenter interface. Called by multichain.newChainSupport(), which
    81  // is itself called by multichain.NewManagerImpl() when ranging over the ledgerFactory's existingChains.
    82  func (co *consenterImpl) HandleChain(cs multichain.ConsenterSupport, metadata *cb.Metadata) (multichain.Chain, error) {
    83  	return newChain(co, cs, getLastOffsetPersisted(metadata)), nil
    84  }
    85  
    86  func getLastOffsetPersisted(metadata *cb.Metadata) int64 {
    87  	if metadata.Value != nil {
    88  		// Extract orderer-related metadata from the tip of the ledger first
    89  		kafkaMetadata := &ab.KafkaMetadata{}
    90  		if err := proto.Unmarshal(metadata.Value, kafkaMetadata); err != nil {
    91  			panic("Ledger may be corrupted: cannot unmarshal orderer metadata in most recent block")
    92  		}
    93  		return kafkaMetadata.LastOffsetPersisted
    94  	}
    95  	return (sarama.OffsetOldest - 1) // default
    96  }
    97  
    98  // When testing we need to inject our own broker/producer/consumer.
    99  // Therefore we need to (a) hold a reference to an object that stores
   100  // the broker/producer/consumer constructors, and (b) refer to that
   101  // object via its interface type, so that we can use a different
   102  // implementation when testing. This, in turn, calls for (c) —- the
   103  // definition of an interface (see testableConsenter below) that will
   104  // be satisfied by both the actual and the mock object and will allow
   105  // us to retrieve these constructors.
   106  func newChain(consenter testableConsenter, support multichain.ConsenterSupport, lastOffsetPersisted int64) *chainImpl {
   107  	logger.Debug("Starting chain with last persisted offset:", lastOffsetPersisted)
   108  	return &chainImpl{
   109  		consenter:           consenter,
   110  		support:             support,
   111  		partition:           newChainPartition(support.ChainID(), rawPartition),
   112  		batchTimeout:        support.SharedConfig().BatchTimeout(),
   113  		lastOffsetPersisted: lastOffsetPersisted,
   114  		producer:            consenter.prodFunc()(support.SharedConfig().KafkaBrokers(), consenter.kafkaVersion(), consenter.retryOptions(), consenter.tlsConfig()),
   115  		halted:              false, // Redundant as the default value for booleans is false but added for readability
   116  		exitChan:            make(chan struct{}),
   117  		haltedChan:          make(chan struct{}),
   118  		setupChan:           make(chan struct{}),
   119  	}
   120  }
   121  
   122  // Satisfied by both chainImpl consenterImpl and mockConsenterImpl.
   123  // Defined so as to facilitate testing.
   124  type testableConsenter interface {
   125  	kafkaVersion() sarama.KafkaVersion
   126  	retryOptions() config.Retry
   127  	tlsConfig() config.TLS
   128  	brokFunc() bfType
   129  	prodFunc() pfType
   130  	consFunc() cfType
   131  }
   132  
   133  func (co *consenterImpl) kafkaVersion() sarama.KafkaVersion { return co.kv }
   134  func (co *consenterImpl) retryOptions() config.Retry        { return co.ro }
   135  func (co *consenterImpl) tlsConfig() config.TLS             { return co.tls }
   136  func (co *consenterImpl) brokFunc() bfType                  { return co.bf }
   137  func (co *consenterImpl) prodFunc() pfType                  { return co.pf }
   138  func (co *consenterImpl) consFunc() cfType                  { return co.cf }
   139  
   140  type chainImpl struct {
   141  	consenter testableConsenter
   142  	support   multichain.ConsenterSupport
   143  
   144  	partition           ChainPartition
   145  	batchTimeout        time.Duration
   146  	lastOffsetPersisted int64
   147  	lastCutBlock        uint64
   148  
   149  	producer Producer
   150  	consumer Consumer
   151  
   152  	halted   bool          // For the Enqueue() calls
   153  	exitChan chan struct{} // For the Chain's Halt() method
   154  
   155  	// Hooks for testing
   156  	haltedChan chan struct{}
   157  	setupChan  chan struct{}
   158  }
   159  
   160  // Start allocates the necessary resources for staying up to date with this Chain.
   161  // Implements the multichain.Chain interface. Called by multichain.NewManagerImpl()
   162  // which is invoked when the ordering process is launched, before the call to NewServer().
   163  func (ch *chainImpl) Start() {
   164  	// 1. Post the CONNECT message to prevent panicking that occurs
   165  	// when seeking on a partition that hasn't been created yet.
   166  	logger.Debug("Posting the CONNECT message...")
   167  	if err := ch.producer.Send(ch.partition, utils.MarshalOrPanic(newConnectMessage())); err != nil {
   168  		logger.Criticalf("Couldn't post CONNECT message to %s: %s", ch.partition, err)
   169  		close(ch.exitChan)
   170  		ch.halted = true
   171  		return
   172  	}
   173  
   174  	// 2. Set up the listener/consumer for this partition.
   175  	consumer, err := ch.consenter.consFunc()(ch.support.SharedConfig().KafkaBrokers(), ch.consenter.kafkaVersion(), ch.consenter.tlsConfig(), ch.partition, ch.lastOffsetPersisted+1)
   176  	if err != nil {
   177  		logger.Criticalf("Cannot retrieve required offset from Kafka cluster for chain %s: %s", ch.partition, err)
   178  		close(ch.exitChan)
   179  		ch.halted = true
   180  		return
   181  	}
   182  	ch.consumer = consumer
   183  	close(ch.setupChan)
   184  	go ch.listenForErrors()
   185  
   186  	// 3. Set the loop the keep up to date with the chain.
   187  	go ch.loop()
   188  }
   189  
   190  func (ch *chainImpl) listenForErrors() {
   191  	select {
   192  	case <-ch.exitChan:
   193  		return
   194  	case err := <-ch.consumer.Errors():
   195  		logger.Error(err)
   196  	}
   197  }
   198  
   199  // Halt frees the resources which were allocated for this Chain.
   200  // Implements the multichain.Chain interface.
   201  func (ch *chainImpl) Halt() {
   202  	select {
   203  	case <-ch.exitChan:
   204  		// This construct is useful because it allows Halt() to be
   205  		// called multiple times w/o panicking. Recal that a receive
   206  		// from a closed channel returns (the zero value) immediately.
   207  	default:
   208  		close(ch.exitChan)
   209  	}
   210  }
   211  
   212  // Enqueue accepts a message and returns true on acceptance, or false on shutdown.
   213  // Implements the multichain.Chain interface. Called by the drainQueue goroutine,
   214  // which is spawned when the broadcast handler's Handle() function is invoked.
   215  func (ch *chainImpl) Enqueue(env *cb.Envelope) bool {
   216  	if ch.halted {
   217  		return false
   218  	}
   219  
   220  	logger.Debug("Enqueueing:", env)
   221  	if err := ch.producer.Send(ch.partition, utils.MarshalOrPanic(newRegularMessage(utils.MarshalOrPanic(env)))); err != nil {
   222  		logger.Errorf("Couldn't post to %s: %s", ch.partition, err)
   223  		return false
   224  	}
   225  
   226  	return !ch.halted // If ch.halted has been set to true while sending, we should return false
   227  }
   228  
   229  func (ch *chainImpl) loop() {
   230  	msg := new(ab.KafkaMessage)
   231  	var timer <-chan time.Time
   232  	var ttcNumber uint64
   233  	var encodedLastOffsetPersisted []byte
   234  
   235  	defer close(ch.haltedChan)
   236  	defer ch.producer.Close()
   237  	defer func() { ch.halted = true }()
   238  	defer ch.consumer.Close()
   239  
   240  	for {
   241  		select {
   242  		case in := <-ch.consumer.Recv():
   243  			if err := proto.Unmarshal(in.Value, msg); err != nil {
   244  				// This shouldn't happen, it should be filtered at ingress
   245  				logger.Critical("Unable to unmarshal consumed message:", err)
   246  			}
   247  			logger.Debug("Received:", msg)
   248  			switch msg.Type.(type) {
   249  			case *ab.KafkaMessage_Connect:
   250  				logger.Debug("It's a connect message - ignoring")
   251  				continue
   252  			case *ab.KafkaMessage_TimeToCut:
   253  				ttcNumber = msg.GetTimeToCut().BlockNumber
   254  				logger.Debug("It's a time-to-cut message for block", ttcNumber)
   255  				if ttcNumber == ch.lastCutBlock+1 {
   256  					timer = nil
   257  					logger.Debug("Nil'd the timer")
   258  					batch, committers := ch.support.BlockCutter().Cut()
   259  					if len(batch) == 0 {
   260  						logger.Warningf("Got right time-to-cut message (%d) but no pending requests - this might indicate a bug", ch.lastCutBlock)
   261  						logger.Infof("Consenter for chain %s exiting", ch.partition.Topic())
   262  						return
   263  					}
   264  					block := ch.support.CreateNextBlock(batch)
   265  					encodedLastOffsetPersisted = utils.MarshalOrPanic(&ab.KafkaMetadata{LastOffsetPersisted: in.Offset})
   266  					ch.support.WriteBlock(block, committers, encodedLastOffsetPersisted)
   267  					ch.lastCutBlock++
   268  					logger.Debug("Proper time-to-cut received, just cut block", ch.lastCutBlock)
   269  					continue
   270  				} else if ttcNumber > ch.lastCutBlock+1 {
   271  					logger.Warningf("Got larger time-to-cut message (%d) than allowed (%d) - this might indicate a bug", ttcNumber, ch.lastCutBlock+1)
   272  					logger.Infof("Consenter for chain %s exiting", ch.partition.Topic())
   273  					return
   274  				}
   275  				logger.Debug("Ignoring stale time-to-cut-message for", ch.lastCutBlock)
   276  			case *ab.KafkaMessage_Regular:
   277  				env := new(cb.Envelope)
   278  				if err := proto.Unmarshal(msg.GetRegular().Payload, env); err != nil {
   279  					// This shouldn't happen, it should be filtered at ingress
   280  					logger.Critical("Unable to unmarshal consumed regular message:", err)
   281  					continue
   282  				}
   283  				batches, committers, ok := ch.support.BlockCutter().Ordered(env)
   284  				logger.Debugf("Ordering results: batches: %v, ok: %v", batches, ok)
   285  				if ok && len(batches) == 0 && timer == nil {
   286  					timer = time.After(ch.batchTimeout)
   287  					logger.Debugf("Just began %s batch timer", ch.batchTimeout.String())
   288  					continue
   289  				}
   290  				// If !ok, batches == nil, so this will be skipped
   291  				for i, batch := range batches {
   292  					block := ch.support.CreateNextBlock(batch)
   293  					encodedLastOffsetPersisted = utils.MarshalOrPanic(&ab.KafkaMetadata{LastOffsetPersisted: in.Offset})
   294  					ch.support.WriteBlock(block, committers[i], encodedLastOffsetPersisted)
   295  					ch.lastCutBlock++
   296  					logger.Debug("Batch filled, just cut block", ch.lastCutBlock)
   297  				}
   298  				if len(batches) > 0 {
   299  					timer = nil
   300  				}
   301  			}
   302  		case <-timer:
   303  			logger.Debugf("Time-to-cut block %d timer expired", ch.lastCutBlock+1)
   304  			timer = nil
   305  			if err := ch.producer.Send(ch.partition, utils.MarshalOrPanic(newTimeToCutMessage(ch.lastCutBlock+1))); err != nil {
   306  				logger.Errorf("Couldn't post to %s: %s", ch.partition, err)
   307  				// Do not exit
   308  			}
   309  		case <-ch.exitChan: // When Halt() is called
   310  			logger.Infof("Consenter for chain %s exiting", ch.partition.Topic())
   311  			return
   312  		}
   313  	}
   314  }
   315  
   316  // Closeable allows the shut down of the calling resource.
   317  type Closeable interface {
   318  	Close() error
   319  }