github.com/klaytn/klaytn@v1.10.2/datasync/chaindatafetcher/kafka/consumer.go (about) 1 // Copyright 2020 The klaytn Authors 2 // This file is part of the klaytn library. 3 // 4 // The klaytn library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The klaytn library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the klaytn library. If not, see <http://www.gnu.org/licenses/>. 16 17 package kafka 18 19 import ( 20 "context" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "log" 25 "os" 26 "time" 27 28 "github.com/Shopify/sarama" 29 ) 30 31 // Logger is the instance of a sarama.StdLogger interface that chaindatafetcher leaves the SDK level information. 32 // By default it is set to print all log messages as standard output, but you can set it to redirect wherever you want. 33 var Logger sarama.StdLogger = log.New(os.Stdout, "[Chaindatafetcher] ", log.LstdFlags) 34 35 //go:generate mockgen -destination=./mocks/consumer_group_session_mock.go -package=mocks github.com/klaytn/klaytn/datasync/chaindatafetcher/kafka ConsumerGroupSession 36 // ConsumerGroupSession is for mocking sarama.ConsumerGroupSession for better testing. 37 type ConsumerGroupSession interface { 38 MarkOffset(topic string, partition int32, offset int64, metadata string) 39 MarkMessage(msg *sarama.ConsumerMessage, metadata string) 40 } 41 42 var ( 43 eventNameErrorMsg = "the event name must be either 'blockgroup' or 'tracegroup'" 44 nilConsumerMessageErrorMsg = "the given message should not be nil" 45 wrongHeaderNumberErrorMsg = "the number of header is not expected" 46 wrongHeaderKeyErrorMsg = "the header key is not expected" 47 wrongMsgVersionErrorMsg = "the message version is not supported" 48 missingSegmentErrorMsg = "there is a missing segment" 49 noHandlerErrorMsg = "the handler does not exist for the given topic" 50 emptySegmentErrorMsg = "there is no segment in the segment slice" 51 bufferOverflowErrorMsg = "the number of items in buffer exceeded the maximum" 52 msgExpiredErrorMsg = "the message is expired" 53 ) 54 55 // TopicHandler is a handler function in order to consume published messages. 56 type TopicHandler func(message *sarama.ConsumerMessage) error 57 58 // Segment represents a message segment with the parsed headers. 59 type Segment struct { 60 orig *sarama.ConsumerMessage 61 key string 62 total uint64 63 index uint64 64 value []byte 65 version string 66 producerId string 67 } 68 69 func (s *Segment) String() string { 70 return fmt.Sprintf("key: %v, total: %v, index: %v, value: %v, version: %v, producerId: %v", s.key, s.total, s.index, string(s.value), s.version, s.producerId) 71 } 72 73 // newSegment creates a new segment structure after parsing the headers. 74 func newSegment(msg *sarama.ConsumerMessage) (*Segment, error) { 75 if msg == nil { 76 return nil, errors.New(nilConsumerMessageErrorMsg) 77 } 78 79 headerLen := len(msg.Headers) 80 if headerLen != MsgHeaderLength && headerLen != LegacyMsgHeaderLength { 81 return nil, fmt.Errorf("%v [header length: %v]", wrongHeaderNumberErrorMsg, headerLen) 82 } 83 84 version := "" 85 producerId := "" 86 87 if headerLen == MsgHeaderLength { 88 keyVersion := string(msg.Headers[MsgHeaderVersion].Key) 89 if keyVersion != KeyVersion { 90 return nil, fmt.Errorf("%v [expected: %v, actual: %v]", wrongHeaderKeyErrorMsg, KeyVersion, keyVersion) 91 } 92 version = string(msg.Headers[MsgHeaderVersion].Value) 93 switch version { 94 case MsgVersion1_0: 95 keyProducerId := string(msg.Headers[MsgHeaderProducerId].Key) 96 if keyProducerId != KeyProducerId { 97 return nil, fmt.Errorf("%v [expected: %v, actual: %v]", wrongHeaderKeyErrorMsg, KeyProducerId, keyProducerId) 98 } 99 producerId = string(msg.Headers[MsgHeaderProducerId].Value) 100 default: 101 return nil, fmt.Errorf("%v [available: %v]", wrongMsgVersionErrorMsg, MsgVersion1_0) 102 } 103 } 104 105 // check the existence of KeyTotalSegments header 106 keyTotalSegments := string(msg.Headers[MsgHeaderTotalSegments].Key) 107 if keyTotalSegments != KeyTotalSegments { 108 return nil, fmt.Errorf("%v [expected: %v, actual: %v]", wrongHeaderKeyErrorMsg, KeyTotalSegments, keyTotalSegments) 109 } 110 111 // check the existence of MsgHeaderSegmentIdx header 112 keySegmentIdx := string(msg.Headers[MsgHeaderSegmentIdx].Key) 113 if keySegmentIdx != KeySegmentIdx { 114 return nil, fmt.Errorf("%v [expected: %v, actual: %v]", wrongHeaderKeyErrorMsg, KeySegmentIdx, keySegmentIdx) 115 } 116 117 key := string(msg.Key) 118 totalSegments := binary.BigEndian.Uint64(msg.Headers[MsgHeaderTotalSegments].Value) 119 segmentIdx := binary.BigEndian.Uint64(msg.Headers[MsgHeaderSegmentIdx].Value) 120 return &Segment{ 121 orig: msg, 122 key: key, 123 total: totalSegments, 124 index: segmentIdx, 125 value: msg.Value, 126 version: version, 127 producerId: producerId, 128 }, nil 129 } 130 131 // Consumer is a reference structure to subscribe block or trace group produced by EN. 132 type Consumer struct { 133 config *KafkaConfig 134 group sarama.ConsumerGroup 135 topics []string 136 handlers map[string]TopicHandler 137 } 138 139 func NewConsumer(config *KafkaConfig, groupId string) (*Consumer, error) { 140 group, err := sarama.NewConsumerGroup(config.Brokers, groupId, config.SaramaConfig) 141 if err != nil { 142 return nil, err 143 } 144 Logger.Printf("[INFO] the chaindatafetcher consumer is created. [groupId: %s, config: %s]", groupId, config.String()) 145 return &Consumer{ 146 config: config, 147 group: group, 148 handlers: make(map[string]TopicHandler), 149 }, nil 150 } 151 152 // Close stops the ConsumerGroup and detaches any running sessions. It is required to call 153 // this function before the object passes out of scope, as it will otherwise leak memory. 154 func (c *Consumer) Close() error { 155 Logger.Println("[INFO] the chaindatafetcher consumer is closed") 156 return c.group.Close() 157 } 158 159 // AddTopicAndHandler adds a topic associated the given event and its handler function to consume published messages of the topic. 160 func (c *Consumer) AddTopicAndHandler(event string, handler TopicHandler) error { 161 if event != EventBlockGroup && event != EventTraceGroup { 162 return fmt.Errorf("%v [given: %v]", eventNameErrorMsg, event) 163 } 164 topic := c.config.GetTopicName(event) 165 c.topics = append(c.topics, topic) 166 c.handlers[topic] = handler 167 return nil 168 } 169 170 func (c *Consumer) Errors() <-chan error { 171 // If c.config.SaramaConfig.Consumer.Return.Errors is set to true, then 172 // the errors while consuming the messages can be read from c.group.Errors() channel. 173 // Otherwise, it leaves only error logs using sarama.Logger as default. 174 return c.group.Errors() 175 } 176 177 // Subscribe subscribes the registered topics with the handlers until the consumer is closed. 178 func (c *Consumer) Subscribe(ctx context.Context) error { 179 if len(c.handlers) == 0 || len(c.topics) == 0 { 180 return errors.New("there is no registered handler") 181 } 182 183 // Iterate over consumer sessions. 184 for { 185 Logger.Println("[INFO] started to consume Kafka message") 186 if err := c.group.Consume(ctx, c.topics, c); err == sarama.ErrClosedConsumerGroup { 187 Logger.Println("[INFO] the consumer group is closed") 188 return nil 189 } else if err != nil { 190 Logger.Printf("[ERROR] the consumption is failed [err: %s]\n", err.Error()) 191 return err 192 } 193 // TODO-Chaindatafetcher add retry logic and error callback here 194 } 195 } 196 197 // The following 3 methods implements ConsumerGroupHandler, and they are called in the order of Setup, Cleanup and ConsumeClaim. 198 // In Subscribe function, Consume method triggers the functions to handle published messages. 199 200 // Setup is called at the beginning of a new session, before ConsumeClaim. 201 func (c *Consumer) Setup(s sarama.ConsumerGroupSession) error { 202 return c.config.Setup(s) 203 } 204 205 // Cleanup is called at the end of a session, once all ConsumeClaim goroutines have exited 206 // but before the offsets are committed for the very last time. 207 func (c *Consumer) Cleanup(s sarama.ConsumerGroupSession) error { 208 return c.config.Cleanup(s) 209 } 210 211 // insertSegment inserts the given segment to the given buffer. 212 // Assumption: 213 // 1. it is guaranteed that the order of segments is correct. 214 // 2. the inserted messages may be duplicated. 215 // 216 // We can consider the following cases. 217 // case1. new segment with index 0 is inserted into newly created segment slice. 218 // case2. new consecutive segment is inserted into the right position. 219 // case3. duplicated segment is ignored. 220 // case4. new sparse segment shouldn't be given, so return an error. 221 func insertSegment(newSegment *Segment, buffer [][]*Segment) ([][]*Segment, error) { 222 for idx, bufferedSegments := range buffer { 223 numBuffered := len(bufferedSegments) 224 if numBuffered > 0 && bufferedSegments[0].key == newSegment.key && bufferedSegments[0].producerId == newSegment.producerId { 225 // there is a missing segment which should not exist. 226 if newSegment.index > uint64(numBuffered) { 227 Logger.Printf("[ERROR] there may be a missing segment [numBuffered: %d, newSegment: %s]\n", numBuffered, newSegment.String()) 228 return buffer, errors.New(missingSegmentErrorMsg) 229 } 230 231 // the segment is already inserted to buffer. 232 if newSegment.index < uint64(numBuffered) { 233 Logger.Printf("[WARN] the message is duplicated [newSegment: %s]\n", newSegment.String()) 234 return buffer, nil 235 } 236 237 // insert the segment to the buffer. 238 buffer[idx] = append(bufferedSegments, newSegment) 239 return buffer, nil 240 } 241 } 242 243 if newSegment.index == 0 { 244 // create a segment slice and append it. 245 buffer = append(buffer, []*Segment{newSegment}) 246 } else { 247 // the segment may be already handled. 248 Logger.Printf("[WARN] the message may be inserted already. drop the segment [segment: %s]\n", newSegment.String()) 249 } 250 return buffer, nil 251 } 252 253 // handleBufferedMessages handles all consecutive complete messages in the buffer. 254 func (c *Consumer) handleBufferedMessages(buffer [][]*Segment) ([][]*Segment, error) { 255 for len(buffer) > 0 { 256 // if any message exists in the buffer 257 oldestMsg, firstSegment, buffered := buffer[0], buffer[0][0], len(buffer[0]) 258 if uint64(buffered) != firstSegment.total { 259 // not ready for assembling messages 260 return buffer, nil 261 } 262 263 // ready for assembling message 264 var msgBuffer []byte 265 for _, segment := range oldestMsg { 266 msgBuffer = append(msgBuffer, segment.value...) 267 } 268 msg := &sarama.ConsumerMessage{ 269 Key: []byte(firstSegment.key), 270 Value: msgBuffer, 271 } 272 273 f, ok := c.handlers[firstSegment.orig.Topic] 274 if !ok { 275 Logger.Printf("[ERROR] getting handler is failed with the given topic. [topic: %s]\n", msg.Topic) 276 return buffer, fmt.Errorf("%v: %v", noHandlerErrorMsg, msg.Topic) 277 } 278 279 if err := f(msg); err != nil { 280 Logger.Printf("[ERROR] the handler is failed [key: %s]\n", string(msg.Key)) 281 return buffer, err 282 } 283 284 buffer = buffer[1:] 285 } 286 287 return buffer, nil 288 } 289 290 // updateOffset updates offset after handling messages. 291 // The offset should be marked for the oldest message (which is not read) in the given buffer. 292 // If there is no segment in the buffer, the last consumed message offset should be marked. 293 func (c *Consumer) updateOffset(buffer [][]*Segment, lastMsg *sarama.ConsumerMessage, session ConsumerGroupSession) error { 294 if len(buffer) > 0 { 295 if len(buffer[0]) <= 0 { 296 Logger.Println("[ERROR] no segment exists in the given buffer slice") 297 return errors.New(emptySegmentErrorMsg) 298 } 299 300 oldestMsg := buffer[0][0].orig 301 // mark the offset as the oldest message has not been read 302 session.MarkOffset(oldestMsg.Topic, oldestMsg.Partition, oldestMsg.Offset, "") 303 } else { 304 // mark the offset as the last message has been read 305 session.MarkMessage(lastMsg, "") 306 } 307 308 return nil 309 } 310 311 // resetTimer resets the given timer if oldest message is changed and it returns oldest message if exists. 312 func (c *Consumer) resetTimer(buffer [][]*Segment, timer *time.Timer, oldestMsg *sarama.ConsumerMessage) *sarama.ConsumerMessage { 313 if c.config.ExpirationTime <= time.Duration(0) { 314 return nil 315 } 316 317 if len(buffer) == 0 { 318 timer.Stop() 319 return nil 320 } 321 322 if oldestMsg != buffer[0][0].orig { 323 timer.Reset(c.config.ExpirationTime) 324 } 325 326 return buffer[0][0].orig 327 } 328 329 // ConsumeClaim must start a consumer loop of ConsumerGroupClaim's Messages(). 330 // Once the Messages() channel is closed, the Handler must finish its processing 331 // loop and exit. 332 func (c *Consumer) ConsumeClaim(cgs sarama.ConsumerGroupSession, cgc sarama.ConsumerGroupClaim) error { 333 if buffer, err := c.consumeClaim(cgs, cgc); err != nil { 334 return c.handleError(buffer, cgs, err) 335 } 336 return nil 337 } 338 339 func (c *Consumer) consumeClaim(cgs sarama.ConsumerGroupSession, cgc sarama.ConsumerGroupClaim) ([][]*Segment, error) { 340 var ( 341 buffer [][]*Segment // TODO-Chaindatafetcher better to introduce segment buffer structure with useful methods 342 oldestMsg *sarama.ConsumerMessage 343 expirationTimer = time.NewTimer(c.config.ExpirationTime) 344 ) 345 346 // make sure that the expirationTimer channel is empty 347 if !expirationTimer.Stop() { 348 <-expirationTimer.C 349 } 350 351 for { 352 select { 353 case <-expirationTimer.C: 354 return buffer, errors.New(msgExpiredErrorMsg) 355 case msg, ok := <-cgc.Messages(): 356 if !ok { 357 return buffer, nil 358 } 359 360 if len(buffer) > c.config.MaxMessageNumber { 361 return buffer, fmt.Errorf("%v: increasing buffer size may resolve this problem. [max: %v, current: %v]", bufferOverflowErrorMsg, c.config.MaxMessageNumber, len(buffer)) 362 } 363 364 segment, err := newSegment(msg) 365 if err != nil { 366 return buffer, err 367 } 368 369 // insert a new message segment into the buffer 370 buffer, err = insertSegment(segment, buffer) 371 if err != nil { 372 return buffer, err 373 } 374 375 // handle the buffered messages if any message can be reassembled 376 buffer, err = c.handleBufferedMessages(buffer) 377 if err != nil { 378 return buffer, err 379 } 380 381 // reset the expiration timer if necessary and update the oldest message 382 oldestMsg = c.resetTimer(buffer, expirationTimer, oldestMsg) 383 384 // mark offset of the oldest message to be read 385 if err := c.updateOffset(buffer, msg, cgs); err != nil { 386 return buffer, err 387 } 388 } 389 } 390 } 391 392 func (c *Consumer) handleError(buffer [][]*Segment, cgs ConsumerGroupSession, parentErr error) error { 393 if len(buffer) <= 0 || c.config.ErrCallback == nil { 394 return parentErr 395 } 396 397 oldestMsg := buffer[0][0].orig 398 key := string(oldestMsg.Key) 399 400 if err := c.config.ErrCallback(key); err != nil { 401 return err 402 } 403 404 buffer = buffer[1:] 405 return c.updateOffset(buffer, oldestMsg, cgs) 406 }