github.com/darrenli6/fabric-sdk-example@v0.0.0-20220109053535-94b13b56df8c/orderer/kafka/chain.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package kafka 8 9 import ( 10 "fmt" 11 "strconv" 12 "time" 13 14 "github.com/Shopify/sarama" 15 "github.com/golang/protobuf/proto" 16 localconfig "github.com/hyperledger/fabric/orderer/localconfig" 17 "github.com/hyperledger/fabric/orderer/multichain" 18 cb "github.com/hyperledger/fabric/protos/common" 19 ab "github.com/hyperledger/fabric/protos/orderer" 20 "github.com/hyperledger/fabric/protos/utils" 21 ) 22 23 // Used for capturing metrics -- see processMessagesToBlocks 24 const ( 25 indexRecvError = iota 26 indexUnmarshalError 27 indexRecvPass 28 indexProcessConnectPass 29 indexProcessTimeToCutError 30 indexProcessTimeToCutPass 31 indexProcessRegularError 32 indexProcessRegularPass 33 indexSendTimeToCutError 34 indexSendTimeToCutPass 35 indexExitChanPass 36 ) 37 38 func newChain(consenter commonConsenter, support multichain.ConsenterSupport, lastOffsetPersisted int64) (*chainImpl, error) { 39 lastCutBlockNumber := getLastCutBlockNumber(support.Height()) 40 logger.Infof("[channel: %s] Starting chain with last persisted offset %d and last recorded block %d", 41 support.ChainID(), lastOffsetPersisted, lastCutBlockNumber) 42 43 errorChan := make(chan struct{}) 44 close(errorChan) // We need this closed when starting up 45 46 return &chainImpl{ 47 consenter: consenter, 48 support: support, 49 channel: newChannel(support.ChainID(), defaultPartition), 50 lastOffsetPersisted: lastOffsetPersisted, 51 lastCutBlockNumber: lastCutBlockNumber, 52 53 errorChan: errorChan, 54 haltChan: make(chan struct{}), 55 startChan: make(chan struct{}), 56 }, nil 57 } 58 59 type chainImpl struct { 60 consenter commonConsenter 61 support multichain.ConsenterSupport 62 63 channel channel 64 lastOffsetPersisted int64 65 lastCutBlockNumber uint64 66 67 producer sarama.SyncProducer 68 parentConsumer sarama.Consumer 69 channelConsumer sarama.PartitionConsumer 70 71 // When the partition consumer errors, close the channel. Otherwise, make 72 // this an open, unbuffered channel. 73 errorChan chan struct{} 74 // When a Halt() request comes, close the channel. Unlike errorChan, this 75 // channel never re-opens when closed. Its closing triggers the exit of the 76 // processMessagesToBlock loop. 77 haltChan chan struct{} 78 // // Close when the retriable steps in Start have completed. 79 startChan chan struct{} 80 } 81 82 // Errored returns a channel which will close when a partition consumer error 83 // has occurred. Checked by Deliver(). 84 func (chain *chainImpl) Errored() <-chan struct{} { 85 return chain.errorChan 86 } 87 88 // Start allocates the necessary resources for staying up to date with this 89 // Chain. Implements the multichain.Chain interface. Called by 90 // multichain.NewManagerImpl() which is invoked when the ordering process is 91 // launched, before the call to NewServer(). Launches a goroutine so as not to 92 // block the multichain.Manager. 93 func (chain *chainImpl) Start() { 94 go startThread(chain) 95 } 96 97 // Halt frees the resources which were allocated for this Chain. Implements the 98 // multichain.Chain interface. 99 func (chain *chainImpl) Halt() { 100 select { 101 case <-chain.haltChan: 102 // This construct is useful because it allows Halt() to be called 103 // multiple times (by a single thread) w/o panicking. Recal that a 104 // receive from a closed channel returns (the zero value) immediately. 105 logger.Warningf("[channel: %s] Halting of chain requested again", chain.support.ChainID()) 106 default: 107 logger.Criticalf("[channel: %s] Halting of chain requested", chain.support.ChainID()) 108 close(chain.haltChan) 109 chain.closeKafkaObjects() // Also close the producer and the consumer 110 logger.Debugf("[channel: %s] Closed the haltChan", chain.support.ChainID()) 111 } 112 } 113 114 // Enqueue accepts a message and returns true on acceptance, or false otheriwse. 115 // Implements the multichain.Chain interface. Called by Broadcast(). 116 func (chain *chainImpl) Enqueue(env *cb.Envelope) bool { 117 logger.Debugf("[channel: %s] Enqueueing envelope...", chain.support.ChainID()) 118 select { 119 case <-chain.startChan: // The Start phase has completed 120 select { 121 case <-chain.haltChan: // The chain has been halted, stop here 122 logger.Warningf("[channel: %s] Will not enqueue, consenter for this channel has been halted", chain.support.ChainID()) 123 return false 124 default: // The post path 125 marshaledEnv, err := utils.Marshal(env) 126 if err != nil { 127 logger.Errorf("[channel: %s] cannot enqueue, unable to marshal envelope = %s", chain.support.ChainID(), err) 128 return false 129 } 130 // We're good to go 131 payload := utils.MarshalOrPanic(newRegularMessage(marshaledEnv)) 132 message := newProducerMessage(chain.channel, payload) 133 if _, _, err := chain.producer.SendMessage(message); err != nil { 134 logger.Errorf("[channel: %s] cannot enqueue envelope = %s", chain.support.ChainID(), err) 135 return false 136 } 137 logger.Debugf("[channel: %s] Envelope enqueued successfully", chain.support.ChainID()) 138 return true 139 } 140 default: // Not ready yet 141 logger.Warningf("[channel: %s] Will not enqueue, consenter for this channel hasn't started yet", chain.support.ChainID()) 142 return false 143 } 144 } 145 146 // Called by Start(). 147 func startThread(chain *chainImpl) { 148 var err error 149 150 // Set up the producer 151 chain.producer, err = setupProducerForChannel(chain.consenter.retryOptions(), chain.haltChan, chain.support.SharedConfig().KafkaBrokers(), chain.consenter.brokerConfig(), chain.channel) 152 if err != nil { 153 logger.Panicf("[channel: %s] Cannot set up producer = %s", chain.channel.topic(), err) 154 } 155 logger.Infof("[channel: %s] Producer set up successfully", chain.support.ChainID()) 156 157 // Have the producer post the CONNECT message 158 if err = sendConnectMessage(chain.consenter.retryOptions(), chain.haltChan, chain.producer, chain.channel); err != nil { 159 logger.Panicf("[channel: %s] Cannot post CONNECT message = %s", chain.channel.topic(), err) 160 } 161 logger.Infof("[channel: %s] CONNECT message posted successfully", chain.channel.topic()) 162 163 // Set up the parent consumer 164 chain.parentConsumer, err = setupParentConsumerForChannel(chain.consenter.retryOptions(), chain.haltChan, chain.support.SharedConfig().KafkaBrokers(), chain.consenter.brokerConfig(), chain.channel) 165 if err != nil { 166 logger.Panicf("[channel: %s] Cannot set up parent consumer = %s", chain.channel.topic(), err) 167 } 168 logger.Infof("[channel: %s] Parent consumer set up successfully", chain.channel.topic()) 169 170 // Set up the channel consumer 171 chain.channelConsumer, err = setupChannelConsumerForChannel(chain.consenter.retryOptions(), chain.haltChan, chain.parentConsumer, chain.channel, chain.lastOffsetPersisted+1) 172 if err != nil { 173 logger.Panicf("[channel: %s] Cannot set up channel consumer = %s", chain.channel.topic(), err) 174 } 175 logger.Infof("[channel: %s] Channel consumer set up successfully", chain.channel.topic()) 176 177 close(chain.startChan) // Broadcast requests will now go through 178 chain.errorChan = make(chan struct{}) // Deliver requests will also go through 179 180 logger.Infof("[channel: %s] Start phase completed successfully", chain.channel.topic()) 181 182 chain.processMessagesToBlocks() // Keep up to date with the channel 183 } 184 185 // processMessagesToBlocks drains the Kafka consumer for the given channel, and 186 // takes care of converting the stream of ordered messages into blocks for the 187 // channel's ledger. 188 func (chain *chainImpl) processMessagesToBlocks() ([]uint64, error) { 189 counts := make([]uint64, 11) // For metrics and tests 190 msg := new(ab.KafkaMessage) 191 var timer <-chan time.Time 192 193 defer func() { // When Halt() is called 194 select { 195 case <-chain.errorChan: // If already closed, don't do anything 196 default: 197 close(chain.errorChan) 198 } 199 }() 200 201 for { 202 select { 203 case <-chain.haltChan: 204 logger.Warningf("[channel: %s] Consenter for channel exiting", chain.support.ChainID()) 205 counts[indexExitChanPass]++ 206 return counts, nil 207 case kafkaErr := <-chain.channelConsumer.Errors(): 208 logger.Errorf("[channel: %s] Error during consumption: %s", chain.support.ChainID(), kafkaErr) 209 counts[indexRecvError]++ 210 select { 211 case <-chain.errorChan: // If already closed, don't do anything 212 default: 213 close(chain.errorChan) 214 } 215 logger.Warningf("[channel: %s] Closed the errorChan", chain.support.ChainID()) 216 // This covers the edge case where (1) a consumption error has 217 // closed the errorChan and thus rendered the chain unavailable to 218 // deliver clients, (2) we're already at the newest offset, and (3) 219 // there are no new Broadcast requests coming in. In this case, 220 // there is no trigger that can recreate the errorChan again and 221 // mark the chain as available, so we have to force that trigger via 222 // the emission of a CONNECT message. TODO Consider rate limiting 223 go sendConnectMessage(chain.consenter.retryOptions(), chain.haltChan, chain.producer, chain.channel) 224 case in, ok := <-chain.channelConsumer.Messages(): 225 if !ok { 226 logger.Criticalf("[channel: %s] Kafka consumer closed.", chain.support.ChainID()) 227 return counts, nil 228 } 229 select { 230 case <-chain.errorChan: // If this channel was closed... 231 chain.errorChan = make(chan struct{}) // ...make a new one. 232 logger.Infof("[channel: %s] Marked consenter as available again", chain.support.ChainID()) 233 default: 234 } 235 if err := proto.Unmarshal(in.Value, msg); err != nil { 236 // This shouldn't happen, it should be filtered at ingress 237 logger.Criticalf("[channel: %s] Unable to unmarshal consumed message = %s", chain.support.ChainID(), err) 238 counts[indexUnmarshalError]++ 239 continue 240 } else { 241 logger.Debugf("[channel: %s] Successfully unmarshalled consumed message, offset is %d. Inspecting type...", chain.support.ChainID(), in.Offset) 242 counts[indexRecvPass]++ 243 } 244 switch msg.Type.(type) { 245 case *ab.KafkaMessage_Connect: 246 _ = processConnect(chain.support.ChainID()) 247 counts[indexProcessConnectPass]++ 248 case *ab.KafkaMessage_TimeToCut: 249 if err := processTimeToCut(msg.GetTimeToCut(), chain.support, &chain.lastCutBlockNumber, &timer, in.Offset); err != nil { 250 logger.Warningf("[channel: %s] %s", chain.support.ChainID(), err) 251 logger.Criticalf("[channel: %s] Consenter for channel exiting", chain.support.ChainID()) 252 counts[indexProcessTimeToCutError]++ 253 return counts, err // TODO Revisit whether we should indeed stop processing the chain at this point 254 } 255 counts[indexProcessTimeToCutPass]++ 256 case *ab.KafkaMessage_Regular: 257 if err := processRegular(msg.GetRegular(), chain.support, &timer, in.Offset, &chain.lastCutBlockNumber); err != nil { 258 logger.Warningf("[channel: %s] Error when processing incoming message of type REGULAR = %s", chain.support.ChainID(), err) 259 counts[indexProcessRegularError]++ 260 } else { 261 counts[indexProcessRegularPass]++ 262 } 263 } 264 case <-timer: 265 if err := sendTimeToCut(chain.producer, chain.channel, chain.lastCutBlockNumber+1, &timer); err != nil { 266 logger.Errorf("[channel: %s] cannot post time-to-cut message = %s", chain.support.ChainID(), err) 267 // Do not return though 268 counts[indexSendTimeToCutError]++ 269 } else { 270 counts[indexSendTimeToCutPass]++ 271 } 272 } 273 } 274 } 275 276 func (chain *chainImpl) closeKafkaObjects() []error { 277 var errs []error 278 279 err := chain.channelConsumer.Close() 280 if err != nil { 281 logger.Errorf("[channel: %s] could not close channelConsumer cleanly = %s", chain.support.ChainID(), err) 282 errs = append(errs, err) 283 } else { 284 logger.Debugf("[channel: %s] Closed the channel consumer", chain.support.ChainID()) 285 } 286 287 err = chain.parentConsumer.Close() 288 if err != nil { 289 logger.Errorf("[channel: %s] could not close parentConsumer cleanly = %s", chain.support.ChainID(), err) 290 errs = append(errs, err) 291 } else { 292 logger.Debugf("[channel: %s] Closed the parent consumer", chain.support.ChainID()) 293 } 294 295 err = chain.producer.Close() 296 if err != nil { 297 logger.Errorf("[channel: %s] could not close producer cleanly = %s", chain.support.ChainID(), err) 298 errs = append(errs, err) 299 } else { 300 logger.Debugf("[channel: %s] Closed the producer", chain.support.ChainID()) 301 } 302 303 return errs 304 } 305 306 // Helper functions 307 308 func getLastCutBlockNumber(blockchainHeight uint64) uint64 { 309 return blockchainHeight - 1 310 } 311 312 func getLastOffsetPersisted(metadataValue []byte, chainID string) int64 { 313 if metadataValue != nil { 314 // Extract orderer-related metadata from the tip of the ledger first 315 kafkaMetadata := &ab.KafkaMetadata{} 316 if err := proto.Unmarshal(metadataValue, kafkaMetadata); err != nil { 317 logger.Panicf("[channel: %s] Ledger may be corrupted:"+ 318 "cannot unmarshal orderer metadata in most recent block", chainID) 319 } 320 return kafkaMetadata.LastOffsetPersisted 321 } 322 return (sarama.OffsetOldest - 1) // default 323 } 324 325 func newConnectMessage() *ab.KafkaMessage { 326 return &ab.KafkaMessage{ 327 Type: &ab.KafkaMessage_Connect{ 328 Connect: &ab.KafkaMessageConnect{ 329 Payload: nil, 330 }, 331 }, 332 } 333 } 334 335 func newRegularMessage(payload []byte) *ab.KafkaMessage { 336 return &ab.KafkaMessage{ 337 Type: &ab.KafkaMessage_Regular{ 338 Regular: &ab.KafkaMessageRegular{ 339 Payload: payload, 340 }, 341 }, 342 } 343 } 344 345 func newTimeToCutMessage(blockNumber uint64) *ab.KafkaMessage { 346 return &ab.KafkaMessage{ 347 Type: &ab.KafkaMessage_TimeToCut{ 348 TimeToCut: &ab.KafkaMessageTimeToCut{ 349 BlockNumber: blockNumber, 350 }, 351 }, 352 } 353 } 354 355 func newProducerMessage(channel channel, pld []byte) *sarama.ProducerMessage { 356 return &sarama.ProducerMessage{ 357 Topic: channel.topic(), 358 Key: sarama.StringEncoder(strconv.Itoa(int(channel.partition()))), // TODO Consider writing an IntEncoder? 359 Value: sarama.ByteEncoder(pld), 360 } 361 } 362 363 func processConnect(channelName string) error { 364 logger.Debugf("[channel: %s] It's a connect message - ignoring", channelName) 365 return nil 366 } 367 368 func processRegular(regularMessage *ab.KafkaMessageRegular, support multichain.ConsenterSupport, timer *<-chan time.Time, receivedOffset int64, lastCutBlockNumber *uint64) error { 369 env := new(cb.Envelope) 370 if err := proto.Unmarshal(regularMessage.Payload, env); err != nil { 371 // This shouldn't happen, it should be filtered at ingress 372 return fmt.Errorf("unmarshal/%s", err) 373 } 374 batches, committers, ok, pending := support.BlockCutter().Ordered(env) 375 logger.Debugf("[channel: %s] Ordering results: items in batch = %d, ok = %v, pending = %v", support.ChainID(), len(batches), ok, pending) 376 if ok && len(batches) == 0 && *timer == nil { 377 *timer = time.After(support.SharedConfig().BatchTimeout()) 378 logger.Debugf("[channel: %s] Just began %s batch timer", support.ChainID(), support.SharedConfig().BatchTimeout().String()) 379 return nil 380 } 381 382 offset := receivedOffset 383 if pending || len(batches) == 2 { 384 // If the newest envelope is not encapsulated into the first batch, 385 // the LastOffsetPersisted of first block should be receivedOffset-1. 386 offset-- 387 } 388 389 // If !ok, batches == nil, so this will be skipped 390 for i, batch := range batches { 391 block := support.CreateNextBlock(batch) 392 encodedLastOffsetPersisted := utils.MarshalOrPanic(&ab.KafkaMetadata{LastOffsetPersisted: offset}) 393 support.WriteBlock(block, committers[i], encodedLastOffsetPersisted) 394 *lastCutBlockNumber++ 395 logger.Debugf("[channel: %s] Batch filled, just cut block %d - last persisted offset is now %d", support.ChainID(), *lastCutBlockNumber, offset) 396 offset++ 397 } 398 399 if len(batches) > 0 { 400 *timer = nil 401 } 402 return nil 403 } 404 405 func processTimeToCut(ttcMessage *ab.KafkaMessageTimeToCut, support multichain.ConsenterSupport, lastCutBlockNumber *uint64, timer *<-chan time.Time, receivedOffset int64) error { 406 ttcNumber := ttcMessage.GetBlockNumber() 407 logger.Debugf("[channel: %s] It's a time-to-cut message for block %d", support.ChainID(), ttcNumber) 408 if ttcNumber == *lastCutBlockNumber+1 { 409 *timer = nil 410 logger.Debugf("[channel: %s] Nil'd the timer", support.ChainID()) 411 batch, committers := support.BlockCutter().Cut() 412 if len(batch) == 0 { 413 return fmt.Errorf("got right time-to-cut message (for block %d),"+ 414 " no pending requests though; this might indicate a bug", *lastCutBlockNumber+1) 415 } 416 block := support.CreateNextBlock(batch) 417 encodedLastOffsetPersisted := utils.MarshalOrPanic(&ab.KafkaMetadata{LastOffsetPersisted: receivedOffset}) 418 support.WriteBlock(block, committers, encodedLastOffsetPersisted) 419 *lastCutBlockNumber++ 420 logger.Debugf("[channel: %s] Proper time-to-cut received, just cut block %d", support.ChainID(), *lastCutBlockNumber) 421 return nil 422 } else if ttcNumber > *lastCutBlockNumber+1 { 423 return fmt.Errorf("got larger time-to-cut message (%d) than allowed/expected (%d)"+ 424 " - this might indicate a bug", ttcNumber, *lastCutBlockNumber+1) 425 } 426 logger.Debugf("[channel: %s] Ignoring stale time-to-cut-message for block %d", support.ChainID(), ttcNumber) 427 return nil 428 } 429 430 // Post a CONNECT message to the channel using the given retry options. This 431 // prevents the panicking that would occur if we were to set up a consumer and 432 // seek on a partition that hadn't been written to yet. 433 func sendConnectMessage(retryOptions localconfig.Retry, exitChan chan struct{}, producer sarama.SyncProducer, channel channel) error { 434 logger.Infof("[channel: %s] About to post the CONNECT message...", channel.topic()) 435 436 payload := utils.MarshalOrPanic(newConnectMessage()) 437 message := newProducerMessage(channel, payload) 438 439 retryMsg := "Attempting to post the CONNECT message..." 440 postConnect := newRetryProcess(retryOptions, exitChan, channel, retryMsg, func() error { 441 _, _, err := producer.SendMessage(message) 442 return err 443 }) 444 445 return postConnect.retry() 446 } 447 448 func sendTimeToCut(producer sarama.SyncProducer, channel channel, timeToCutBlockNumber uint64, timer *<-chan time.Time) error { 449 logger.Debugf("[channel: %s] Time-to-cut block %d timer expired", channel.topic(), timeToCutBlockNumber) 450 *timer = nil 451 payload := utils.MarshalOrPanic(newTimeToCutMessage(timeToCutBlockNumber)) 452 message := newProducerMessage(channel, payload) 453 _, _, err := producer.SendMessage(message) 454 return err 455 } 456 457 // Sets up the partition consumer for a channel using the given retry options. 458 func setupChannelConsumerForChannel(retryOptions localconfig.Retry, haltChan chan struct{}, parentConsumer sarama.Consumer, channel channel, startFrom int64) (sarama.PartitionConsumer, error) { 459 var err error 460 var channelConsumer sarama.PartitionConsumer 461 462 logger.Infof("[channel: %s] Setting up the channel consumer for this channel (start offset: %d)...", channel.topic(), startFrom) 463 464 retryMsg := "Connecting to the Kafka cluster" 465 setupChannelConsumer := newRetryProcess(retryOptions, haltChan, channel, retryMsg, func() error { 466 channelConsumer, err = parentConsumer.ConsumePartition(channel.topic(), channel.partition(), startFrom) 467 return err 468 }) 469 470 return channelConsumer, setupChannelConsumer.retry() 471 } 472 473 // Sets up the parent consumer for a channel using the given retry options. 474 func setupParentConsumerForChannel(retryOptions localconfig.Retry, haltChan chan struct{}, brokers []string, brokerConfig *sarama.Config, channel channel) (sarama.Consumer, error) { 475 var err error 476 var parentConsumer sarama.Consumer 477 478 logger.Infof("[channel: %s] Setting up the parent consumer for this channel...", channel.topic()) 479 480 retryMsg := "Connecting to the Kafka cluster" 481 setupParentConsumer := newRetryProcess(retryOptions, haltChan, channel, retryMsg, func() error { 482 parentConsumer, err = sarama.NewConsumer(brokers, brokerConfig) 483 return err 484 }) 485 486 return parentConsumer, setupParentConsumer.retry() 487 } 488 489 // Sets up the writer/producer for a channel using the given retry options. 490 func setupProducerForChannel(retryOptions localconfig.Retry, haltChan chan struct{}, brokers []string, brokerConfig *sarama.Config, channel channel) (sarama.SyncProducer, error) { 491 var err error 492 var producer sarama.SyncProducer 493 494 logger.Infof("[channel: %s] Setting up the producer for this channel...", channel.topic()) 495 496 retryMsg := "Connecting to the Kafka cluster" 497 setupProducer := newRetryProcess(retryOptions, haltChan, channel, retryMsg, func() error { 498 producer, err = sarama.NewSyncProducer(brokers, brokerConfig) 499 return err 500 }) 501 502 return producer, setupProducer.retry() 503 }