github.com/ydb-platform/ydb-go-sdk/v3@v3.57.0/internal/topic/topicwriterinternal/writer_reconnector.go (about) 1 package topicwriterinternal 2 3 import ( 4 "context" 5 "crypto/rand" 6 "errors" 7 "fmt" 8 "math" 9 "math/big" 10 "runtime" 11 "sync/atomic" 12 "time" 13 14 "github.com/google/uuid" 15 "github.com/jonboulle/clockwork" 16 "golang.org/x/sync/semaphore" 17 18 "github.com/ydb-platform/ydb-go-sdk/v3/credentials" 19 "github.com/ydb-platform/ydb-go-sdk/v3/internal/background" 20 "github.com/ydb-platform/ydb-go-sdk/v3/internal/config" 21 "github.com/ydb-platform/ydb-go-sdk/v3/internal/empty" 22 "github.com/ydb-platform/ydb-go-sdk/v3/internal/grpcwrapper/rawtopic/rawtopiccommon" 23 "github.com/ydb-platform/ydb-go-sdk/v3/internal/grpcwrapper/rawtopic/rawtopicwriter" 24 "github.com/ydb-platform/ydb-go-sdk/v3/internal/topic" 25 "github.com/ydb-platform/ydb-go-sdk/v3/internal/value" 26 "github.com/ydb-platform/ydb-go-sdk/v3/internal/xcontext" 27 "github.com/ydb-platform/ydb-go-sdk/v3/internal/xerrors" 28 "github.com/ydb-platform/ydb-go-sdk/v3/internal/xsync" 29 "github.com/ydb-platform/ydb-go-sdk/v3/topic/topictypes" 30 "github.com/ydb-platform/ydb-go-sdk/v3/trace" 31 ) 32 33 var ( 34 errConnTimeout = xerrors.Wrap(errors.New("ydb: connection timeout")) 35 errStopWriterReconnector = xerrors.Wrap(errors.New("ydb: stop writer reconnector")) 36 errNonZeroSeqNo = xerrors.Wrap(errors.New("ydb: non zero seqno for auto set seqno mode")) 37 errNonZeroCreatedAt = xerrors.Wrap(errors.New("ydb: non zero Message.CreatedAt and set auto fill created at option")) //nolint:lll 38 errNoAllowedCodecs = xerrors.Wrap(errors.New("ydb: no allowed codecs for write to topic")) 39 errLargeMessage = xerrors.Wrap(errors.New("ydb: message uncompressed size more, then limit")) 40 PublicErrQueueIsFull = xerrors.Wrap(errors.New("ydb: queue is full")) 41 42 // errProducerIDNotEqualMessageGroupID is temporary 43 // WithMessageGroupID is optional parameter because it allowed to be skipped by protocol. 44 // But right not YDB server doesn't implement it. 45 // It is fast check for return error at writer create context instead of stream initialization 46 // The error will remove in the future, when skip message group id will be allowed by server. 47 errProducerIDNotEqualMessageGroupID = xerrors.Wrap(errors.New("ydb: producer id not equal to message group id, use option WithMessageGroupID(producerID) for create writer")) //nolint:lll 48 ) 49 50 type WriterReconnectorConfig struct { 51 WritersCommonConfig 52 53 MaxMessageSize int 54 MaxQueueLen int 55 Common config.Common 56 AdditionalEncoders map[rawtopiccommon.Codec]PublicCreateEncoderFunc 57 Connect ConnectFunc 58 WaitServerAck bool 59 AutoSetSeqNo bool 60 AutoSetCreatedTime bool 61 OnWriterInitResponseCallback PublicOnWriterInitResponseCallback 62 RetrySettings topic.RetrySettings 63 64 connectTimeout time.Duration 65 } 66 67 func (cfg *WriterReconnectorConfig) validate() error { 68 if cfg.defaultPartitioning.Type == rawtopicwriter.PartitioningMessageGroupID && 69 cfg.producerID != cfg.defaultPartitioning.MessageGroupID { 70 return xerrors.WithStackTrace(errProducerIDNotEqualMessageGroupID) 71 } 72 73 return nil 74 } 75 76 func newWriterReconnectorConfig(options ...PublicWriterOption) WriterReconnectorConfig { 77 cfg := WriterReconnectorConfig{ 78 WritersCommonConfig: WritersCommonConfig{ 79 cred: credentials.NewAnonymousCredentials(), 80 credUpdateInterval: time.Hour, 81 clock: clockwork.NewRealClock(), 82 compressorCount: runtime.NumCPU(), 83 tracer: &trace.Topic{}, 84 }, 85 AutoSetSeqNo: true, 86 AutoSetCreatedTime: true, 87 MaxMessageSize: 50 * 1024 * 1024, 88 MaxQueueLen: 1000, 89 RetrySettings: topic.RetrySettings{ 90 StartTimeout: topic.DefaultStartTimeout, 91 }, 92 } 93 if cfg.compressorCount == 0 { 94 cfg.compressorCount = 1 95 } 96 97 for _, f := range options { 98 f(&cfg) 99 } 100 101 if cfg.connectTimeout == 0 { 102 cfg.connectTimeout = cfg.Common.OperationTimeout() 103 } 104 105 if cfg.connectTimeout == 0 { 106 cfg.connectTimeout = value.InfiniteDuration 107 } 108 109 if cfg.producerID == "" { 110 WithProducerID(uuid.NewString())(&cfg) 111 } 112 113 return cfg 114 } 115 116 type WriterReconnector struct { 117 cfg WriterReconnectorConfig 118 queue messageQueue 119 background background.Worker 120 retrySettings topic.RetrySettings 121 clock clockwork.Clock 122 writerInstanceID string 123 sessionID string 124 semaphore *semaphore.Weighted 125 firstInitResponseProcessedChan empty.Chan 126 lastSeqNo int64 127 encodersMap *EncoderMap 128 initDoneCh empty.Chan 129 initInfo InitialInfo 130 m xsync.RWMutex 131 firstConnectionHandled atomic.Bool 132 initDone bool 133 } 134 135 func newWriterReconnector( 136 cfg WriterReconnectorConfig, //nolint:gocritic 137 ) *WriterReconnector { 138 res := newWriterReconnectorStopped(cfg) 139 res.start() 140 141 return res 142 } 143 144 func newWriterReconnectorStopped( 145 cfg WriterReconnectorConfig, //nolint:gocritic 146 ) *WriterReconnector { 147 writerInstanceID, _ := rand.Int(rand.Reader, big.NewInt(math.MaxInt64)) 148 res := &WriterReconnector{ 149 cfg: cfg, 150 semaphore: semaphore.NewWeighted(int64(cfg.MaxQueueLen)), 151 queue: newMessageQueue(), 152 clock: clockwork.NewRealClock(), 153 lastSeqNo: -1, 154 firstInitResponseProcessedChan: make(empty.Chan), 155 encodersMap: NewEncoderMap(), 156 writerInstanceID: writerInstanceID.String(), 157 retrySettings: cfg.RetrySettings, 158 } 159 160 res.queue.OnAckReceived = res.onAckReceived 161 162 for codec, creator := range cfg.AdditionalEncoders { 163 res.encodersMap.AddEncoder(codec, creator) 164 } 165 166 res.sessionID = "not-connected-" + writerInstanceID.String() 167 168 res.initDoneCh = make(empty.Chan) 169 170 return res 171 } 172 173 func (w *WriterReconnector) fillFields(messages []messageWithDataContent) error { 174 var now time.Time 175 176 for i := range messages { 177 msg := &messages[i] 178 179 // SetSeqNo 180 if w.cfg.AutoSetSeqNo { 181 if msg.SeqNo != 0 { 182 return xerrors.WithStackTrace(errNonZeroSeqNo) 183 } 184 w.lastSeqNo++ 185 msg.SeqNo = w.lastSeqNo 186 } 187 188 // Set created time 189 if w.cfg.AutoSetCreatedTime { 190 if msg.CreatedAt.IsZero() { 191 if now.IsZero() { 192 now = w.clock.Now() 193 } 194 msg.CreatedAt = now 195 } else { 196 return xerrors.WithStackTrace(errNonZeroCreatedAt) 197 } 198 } 199 } 200 201 return nil 202 } 203 204 func (w *WriterReconnector) start() { 205 name := fmt.Sprintf("writer %q", w.cfg.topic) 206 w.background.Start(name+", sendloop", w.connectionLoop) 207 } 208 209 func (w *WriterReconnector) Write(ctx context.Context, messages []PublicMessage) error { 210 if err := w.background.CloseReason(); err != nil { 211 return xerrors.WithStackTrace(fmt.Errorf("ydb: writer is closed: %w", err)) 212 } 213 if ctx.Err() != nil { 214 return ctx.Err() 215 } 216 if len(messages) == 0 { 217 return nil 218 } 219 220 semaphoreWeight := int64(len(messages)) 221 if semaphoreWeight > int64(w.cfg.MaxQueueLen) { 222 return xerrors.WithStackTrace(fmt.Errorf( 223 "ydb: add more messages, then max queue limit. max queue: %v, try to add: %v: %w", 224 w.cfg.MaxQueueLen, 225 semaphoreWeight, 226 PublicErrQueueIsFull, 227 )) 228 } 229 if err := w.semaphore.Acquire(ctx, semaphoreWeight); err != nil { 230 return xerrors.WithStackTrace( 231 fmt.Errorf("ydb: add new messages exceed max queue size limit. Add count: %v, max size: %v: %w", 232 semaphoreWeight, 233 w.cfg.MaxQueueLen, 234 PublicErrQueueIsFull, 235 )) 236 } 237 defer func() { 238 w.semaphore.Release(semaphoreWeight) 239 }() 240 241 messagesSlice, err := w.createMessagesWithContent(messages) 242 if err != nil { 243 return err 244 } 245 246 if err = w.checkMessages(messagesSlice); err != nil { 247 return err 248 } 249 250 if err = w.waitFirstInitResponse(ctx); err != nil { 251 return err 252 } 253 254 var waiter MessageQueueAckWaiter 255 w.m.WithLock(func() { 256 // need set numbers and add to queue atomically 257 err = w.fillFields(messagesSlice) 258 if err != nil { 259 return 260 } 261 262 if w.cfg.WaitServerAck { 263 waiter, err = w.queue.AddMessagesWithWaiter(messagesSlice) 264 } else { 265 err = w.queue.AddMessages(messagesSlice) 266 } 267 if err == nil { 268 // move semaphore weight to queue 269 semaphoreWeight = 0 270 } 271 }) 272 if err != nil { 273 return err 274 } 275 276 if !w.cfg.WaitServerAck { 277 return nil 278 } 279 280 return w.queue.Wait(ctx, waiter) 281 } 282 283 func (w *WriterReconnector) checkMessages(messages []messageWithDataContent) error { 284 for i := range messages { 285 size := messages[i].BufUncompressedSize 286 if size > w.cfg.MaxMessageSize { 287 return xerrors.WithStackTrace(fmt.Errorf("message size bytes %v: %w", size, errLargeMessage)) 288 } 289 } 290 291 return nil 292 } 293 294 func (w *WriterReconnector) createMessagesWithContent(messages []PublicMessage) ([]messageWithDataContent, error) { 295 res := make([]messageWithDataContent, 0, len(messages)) 296 for i := range messages { 297 mess := newMessageDataWithContent(messages[i], w.encodersMap) 298 res = append(res, mess) 299 } 300 301 var sessionID string 302 w.m.WithRLock(func() { 303 sessionID = w.sessionID 304 }) 305 onCompressDone := trace.TopicOnWriterCompressMessages( 306 w.cfg.tracer, 307 w.writerInstanceID, 308 sessionID, 309 w.cfg.forceCodec.ToInt32(), 310 messages[0].SeqNo, 311 len(messages), 312 trace.TopicWriterCompressMessagesReasonCompressDataOnWriteReadData, 313 ) 314 315 targetCodec := w.cfg.forceCodec 316 if targetCodec == rawtopiccommon.CodecUNSPECIFIED { 317 targetCodec = rawtopiccommon.CodecRaw 318 } 319 err := cacheMessages(res, targetCodec, w.cfg.compressorCount) 320 onCompressDone(err) 321 if err != nil { 322 return nil, err 323 } 324 325 return res, nil 326 } 327 328 func (w *WriterReconnector) Close(ctx context.Context) error { 329 return w.close(ctx, xerrors.WithStackTrace(errStopWriterReconnector)) 330 } 331 332 func (w *WriterReconnector) close(ctx context.Context, reason error) (resErr error) { 333 onDone := trace.TopicOnWriterClose(w.cfg.tracer, w.writerInstanceID, reason) 334 defer func() { 335 onDone(resErr) 336 }() 337 338 resErr = w.queue.Close(reason) 339 bgErr := w.background.Close(ctx, reason) 340 if resErr == nil { 341 resErr = bgErr 342 } 343 344 return resErr 345 } 346 347 func (w *WriterReconnector) connectionLoop(ctx context.Context) { 348 doneCtx := ctx.Done() 349 attempt := 0 350 351 createStreamContext := func() (context.Context, context.CancelFunc) { 352 // need suppress parent context cancelation for flush buffer while close writer 353 return xcontext.WithCancel(xcontext.WithoutDeadline(ctx)) 354 } 355 356 //nolint:ineffassign,staticcheck,wastedassign 357 streamCtx, streamCtxCancel := createStreamContext() 358 359 defer streamCtxCancel() 360 361 var reconnectReason error 362 var prevAttemptTime time.Time 363 var startOfRetries time.Time 364 365 for { 366 if ctx.Err() != nil { 367 return 368 } 369 370 streamCtxCancel() 371 streamCtx, streamCtxCancel = createStreamContext() 372 373 now := time.Now() 374 if startOfRetries.IsZero() || topic.CheckResetReconnectionCounters(prevAttemptTime, now, w.cfg.connectTimeout) { 375 attempt = 0 376 startOfRetries = w.clock.Now() 377 } else { 378 attempt++ 379 } 380 prevAttemptTime = now 381 382 if reconnectReason != nil { 383 if backoff, retry := topic.CheckRetryMode(reconnectReason, w.retrySettings, w.clock.Since(startOfRetries)); retry { 384 delay := backoff.Delay(attempt) 385 select { 386 case <-doneCtx: 387 return 388 case <-w.clock.After(delay): 389 // pass 390 } 391 } else { 392 _ = w.close(ctx, reconnectReason) 393 394 return 395 } 396 } 397 398 writer, err := w.startWriteStream(ctx, streamCtx, attempt) 399 w.onWriterChange(writer) 400 if err == nil { 401 reconnectReason = writer.WaitClose(ctx) 402 startOfRetries = time.Now() 403 } else { 404 reconnectReason = err 405 } 406 } 407 } 408 409 func (w *WriterReconnector) startWriteStream(ctx, streamCtx context.Context, attempt int) ( 410 writer *SingleStreamWriter, 411 err error, 412 ) { 413 traceOnDone := trace.TopicOnWriterReconnect( 414 w.cfg.tracer, 415 w.writerInstanceID, 416 w.cfg.topic, 417 w.cfg.producerID, 418 attempt, 419 ) 420 defer func() { 421 traceOnDone(err) 422 }() 423 424 stream, err := w.connectWithTimeout(streamCtx) 425 if err != nil { 426 return nil, err 427 } 428 429 w.queue.ResetSentProgress() 430 431 return NewSingleStreamWriter(ctx, w.createWriterStreamConfig(stream)) 432 } 433 434 func (w *WriterReconnector) needReceiveLastSeqNo() bool { 435 res := !w.firstConnectionHandled.Load() 436 437 return res 438 } 439 440 func (w *WriterReconnector) connectWithTimeout(streamLifetimeContext context.Context) (RawTopicWriterStream, error) { 441 connectCtx, connectCancel := xcontext.WithCancel(streamLifetimeContext) 442 443 type resT struct { 444 stream RawTopicWriterStream 445 err error 446 } 447 resCh := make(chan resT, 1) 448 449 go func() { 450 defer func() { 451 p := recover() 452 if p != nil { 453 resCh <- resT{ 454 stream: nil, 455 err: xerrors.WithStackTrace(xerrors.Wrap(fmt.Errorf("ydb: panic while connect to topic writer: %+v", p))), 456 } 457 } 458 }() 459 460 stream, err := w.cfg.Connect(connectCtx) 461 resCh <- resT{stream: stream, err: err} 462 }() 463 464 timer := time.NewTimer(w.cfg.connectTimeout) 465 defer timer.Stop() 466 467 select { 468 case <-timer.C: 469 connectCancel() 470 471 return nil, xerrors.WithStackTrace(errConnTimeout) 472 case res := <-resCh: 473 // force no cancel connect context - because it will break stream 474 // context will cancel by cancel streamLifetimeContext while reconnect or stop connection 475 _ = connectCancel 476 477 return res.stream, res.err 478 } 479 } 480 481 func (w *WriterReconnector) onAckReceived(count int) { 482 w.semaphore.Release(int64(count)) 483 } 484 485 func (w *WriterReconnector) onWriterChange(writerStream *SingleStreamWriter) { 486 isFirstInit := false 487 w.m.WithLock(func() { 488 if writerStream == nil { 489 w.sessionID = "" 490 491 return 492 } 493 w.sessionID = writerStream.SessionID 494 495 if !w.firstConnectionHandled.CompareAndSwap(false, true) { 496 return 497 } 498 defer close(w.firstInitResponseProcessedChan) 499 isFirstInit = true 500 501 if writerStream.LastSeqNumRequested { 502 w.lastSeqNo = writerStream.ReceivedLastSeqNum 503 } 504 }) 505 506 if isFirstInit { 507 w.m.WithLock(func() { 508 w.initDone = true 509 w.initInfo = InitialInfo{LastSeqNum: w.lastSeqNo} 510 close(w.initDoneCh) 511 }) 512 w.onWriterInitCallbackHandler(writerStream) 513 } 514 } 515 516 func (w *WriterReconnector) WaitInit(ctx context.Context) (info InitialInfo, err error) { 517 if ctx.Err() != nil { 518 return InitialInfo{}, ctx.Err() 519 } 520 521 select { 522 case <-ctx.Done(): 523 return InitialInfo{}, ctx.Err() 524 case <-w.initDoneCh: 525 return w.initInfo, nil 526 } 527 } 528 529 func (w *WriterReconnector) onWriterInitCallbackHandler(writerStream *SingleStreamWriter) { 530 if w.cfg.OnWriterInitResponseCallback != nil { 531 info := PublicWithOnWriterConnectedInfo{ 532 LastSeqNo: w.lastSeqNo, 533 SessionID: w.sessionID, 534 PartitionID: writerStream.PartitionID, 535 CodecsFromServer: createPublicCodecsFromRaw(writerStream.CodecsFromServer), 536 } 537 538 if err := w.cfg.OnWriterInitResponseCallback(info); err != nil { 539 _ = w.close(context.Background(), fmt.Errorf("OnWriterInitResponseCallback return error: %w", err)) 540 } 541 } 542 } 543 544 func (w *WriterReconnector) waitFirstInitResponse(ctx context.Context) error { 545 if err := ctx.Err(); err != nil { 546 return err 547 } 548 549 if w.firstConnectionHandled.Load() { 550 return nil 551 } 552 553 select { 554 case <-w.background.Done(): 555 return w.background.CloseReason() 556 case <-w.firstInitResponseProcessedChan: 557 return nil 558 case <-ctx.Done(): 559 return ctx.Err() 560 } 561 } 562 563 func (w *WriterReconnector) createWriterStreamConfig(stream RawTopicWriterStream) SingleStreamWriterConfig { 564 cfg := newSingleStreamWriterConfig( 565 w.cfg.WritersCommonConfig, 566 stream, 567 &w.queue, 568 w.encodersMap, 569 w.needReceiveLastSeqNo(), 570 w.writerInstanceID, 571 ) 572 573 return cfg 574 } 575 576 func sendMessagesToStream( 577 stream RawTopicWriterStream, 578 targetCodec rawtopiccommon.Codec, 579 messages []messageWithDataContent, 580 ) error { 581 if len(messages) == 0 { 582 return nil 583 } 584 585 request, err := createWriteRequest(messages, targetCodec) 586 if err != nil { 587 return err 588 } 589 err = stream.Send(&request) 590 if err != nil { 591 return xerrors.WithStackTrace(fmt.Errorf("ydb: failed send write request: %w", err)) 592 } 593 594 return nil 595 } 596 597 func allMessagesHasSameBufCodec(messages []messageWithDataContent) bool { 598 if len(messages) <= 1 { 599 return true 600 } 601 602 codec := messages[0].bufCodec 603 for i := range messages { 604 if messages[i].bufCodec != codec { 605 return false 606 } 607 } 608 609 return true 610 } 611 612 func splitMessagesByBufCodec(messages []messageWithDataContent) (res [][]messageWithDataContent) { 613 if len(messages) == 0 { 614 return nil 615 } 616 617 currentGroupStart := 0 618 currentCodec := messages[0].bufCodec 619 for i := range messages { 620 if messages[i].bufCodec != currentCodec { 621 res = append(res, messages[currentGroupStart:i:i]) 622 currentGroupStart = i 623 currentCodec = messages[i].bufCodec 624 } 625 } 626 res = append(res, messages[currentGroupStart:len(messages):len(messages)]) 627 628 return res 629 } 630 631 func createWriteRequest(messages []messageWithDataContent, targetCodec rawtopiccommon.Codec) ( 632 res rawtopicwriter.WriteRequest, 633 err error, 634 ) { 635 res.Codec = targetCodec 636 res.Messages = make([]rawtopicwriter.MessageData, len(messages)) 637 for i := range messages { 638 res.Messages[i], err = createRawMessageData(res.Codec, &messages[i]) 639 if err != nil { 640 return res, err 641 } 642 } 643 644 return res, nil 645 } 646 647 func createRawMessageData( 648 codec rawtopiccommon.Codec, 649 mess *messageWithDataContent, 650 ) (res rawtopicwriter.MessageData, err error) { 651 res.CreatedAt = mess.CreatedAt 652 res.SeqNo = mess.SeqNo 653 654 switch { 655 case mess.futurePartitioning.hasPartitionID: 656 res.Partitioning.Type = rawtopicwriter.PartitioningPartitionID 657 res.Partitioning.PartitionID = mess.futurePartitioning.partitionID 658 case mess.futurePartitioning.messageGroupID != "": 659 res.Partitioning.Type = rawtopicwriter.PartitioningMessageGroupID 660 res.Partitioning.MessageGroupID = mess.futurePartitioning.messageGroupID 661 default: 662 // pass 663 } 664 665 res.UncompressedSize = int64(mess.BufUncompressedSize) 666 res.Data, err = mess.GetEncodedBytes(codec) 667 668 if len(mess.Metadata) > 0 { 669 res.MetadataItems = make([]rawtopiccommon.MetadataItem, 0, len(mess.Metadata)) 670 for key, val := range mess.Metadata { 671 res.MetadataItems = append(res.MetadataItems, rawtopiccommon.MetadataItem{ 672 Key: key, 673 Value: val, 674 }) 675 } 676 } 677 678 return res, err 679 } 680 681 func calculateAllowedCodecs(forceCodec rawtopiccommon.Codec, encoderMap *EncoderMap, 682 serverCodecs rawtopiccommon.SupportedCodecs, 683 ) rawtopiccommon.SupportedCodecs { 684 if forceCodec != rawtopiccommon.CodecUNSPECIFIED { 685 if serverCodecs.AllowedByCodecsList(forceCodec) && encoderMap.IsSupported(forceCodec) { 686 return rawtopiccommon.SupportedCodecs{forceCodec} 687 } 688 689 return nil 690 } 691 692 if len(serverCodecs) == 0 { 693 // fixed list for autoselect codec if empty server list for prevent unexpectedly add messages with new codec 694 // with sdk update 695 serverCodecs = rawtopiccommon.SupportedCodecs{rawtopiccommon.CodecRaw, rawtopiccommon.CodecGzip} 696 } 697 698 res := make(rawtopiccommon.SupportedCodecs, 0, len(serverCodecs)) 699 for _, codec := range serverCodecs { 700 if encoderMap.IsSupported(codec) { 701 res = append(res, codec) 702 } 703 } 704 if len(res) == 0 { 705 res = nil 706 } 707 708 return res 709 } 710 711 type ConnectFunc func(ctx context.Context) (RawTopicWriterStream, error) 712 713 func createPublicCodecsFromRaw(codecs rawtopiccommon.SupportedCodecs) []topictypes.Codec { 714 res := make([]topictypes.Codec, len(codecs)) 715 for i, v := range codecs { 716 res[i] = topictypes.Codec(v) 717 } 718 719 return res 720 }