github.com/hack0072008/kafka-go@v1.0.1/writer.go

github.com/hack0072008/kafka-go@v1.0.1/writer.go (about)

     1  package kafka
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"io"
     8  	"net"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	metadataAPI "github.com/hack0072008/kafka-go/protocol/metadata"
    14  )
    15  
    16  // The Writer type provides the implementation of a producer of kafka messages
    17  // that automatically distributes messages across partitions of a single topic
    18  // using a configurable balancing policy.
    19  //
    20  // Writes manage the dispatch of messages across partitions of the topic they
    21  // are configured to write to using a Balancer, and aggregate batches to
    22  // optimize the writes to kafka.
    23  //
    24  // Writers may be configured to be used synchronously or asynchronously. When
    25  // use synchronously, calls to WriteMessages block until the messages have been
    26  // written to kafka. In this mode, the program should inspect the error returned
    27  // by the function and test if it an instance of kafka.WriteErrors in order to
    28  // identify which messages have succeeded or failed, for example:
    29  //
    30  //	// Construct a synchronous writer (the default mode).
    31  //	w := &kafka.Writer{
    32  //		Addr:         kafka.TCP("localhost:9092"),
    33  //		Topic:        "topic-A",
    34  //		RequiredAcks: kafka.RequireAll,
    35  //	}
    36  //
    37  //	...
    38  //
    39  //  // Passing a context can prevent the operation from blocking indefinitely.
    40  //	switch err := w.WriteMessages(ctx, msgs...).(type) {
    41  //	case nil:
    42  //	case kafka.WriteErrors:
    43  //		for i := range msgs {
    44  //			if err[i] != nil {
    45  //				// handle the error writing msgs[i]
    46  //				...
    47  //			}
    48  //		}
    49  //	default:
    50  //		// handle other errors
    51  //		...
    52  //	}
    53  //
    54  // In asynchronous mode, the program may configure a completion handler on the
    55  // writer to receive notifications of messages being written to kafka:
    56  //
    57  //	w := &kafka.Writer{
    58  //		Addr:         kafka.TCP("localhost:9092"),
    59  //		Topic:        "topic-A",
    60  //		RequiredAcks: kafka.RequireAll,
    61  //		Async:        true, // make the writer asynchronous
    62  //		Completion: func(messages []kafka.Message, err error) {
    63  //			...
    64  //		},
    65  //	}
    66  //
    67  //	...
    68  //
    69  //	// Because the writer is asynchronous, there is no need for the context to
    70  //	// be cancelled, the call will never block.
    71  //	if err := w.WriteMessages(context.Background(), msgs...); err != nil {
    72  //		// Only validation errors would be reported in this case.
    73  //		...
    74  //	}
    75  //
    76  // Methods of Writer are safe to use concurrently from multiple goroutines,
    77  // however the writer configuration should not be modified after first use.
    78  type Writer struct {
    79  	// Address of the kafka cluster that this writer is configured to send
    80  	// messages to.
    81  	//
    82  	// This feild is required, attempting to write messages to a writer with a
    83  	// nil address will error.
    84  	Addr net.Addr
    85  
    86  	// Topic is the name of the topic that the writer will produce messages to.
    87  	//
    88  	// Setting this field or not is a mutually exclusive option. If you set Topic
    89  	// here, you must not set Topic for any produced Message. Otherwise, if you	do
    90  	// not set Topic, every Message must have Topic specified.
    91  	Topic string
    92  
    93  	// The balancer used to distribute messages across partitions.
    94  	//
    95  	// The default is to use a round-robin distribution.
    96  	Balancer Balancer
    97  
    98  	// Limit on how many attempts will be made to deliver a message.
    99  	//
   100  	// The default is to try at most 10 times.
   101  	MaxAttempts int
   102  
   103  	// Limit on how many messages will be buffered before being sent to a
   104  	// partition.
   105  	//
   106  	// The default is to use a target batch size of 100 messages.
   107  	BatchSize int
   108  
   109  	// Limit the maximum size of a request in bytes before being sent to
   110  	// a partition.
   111  	//
   112  	// The default is to use a kafka default value of 1048576.
   113  	BatchBytes int64
   114  
   115  	// Time limit on how often incomplete message batches will be flushed to
   116  	// kafka.
   117  	//
   118  	// The default is to flush at least every second.
   119  	BatchTimeout time.Duration
   120  
   121  	// Timeout for read operations performed by the Writer.
   122  	//
   123  	// Defaults to 10 seconds.
   124  	ReadTimeout time.Duration
   125  
   126  	// Timeout for write operation performed by the Writer.
   127  	//
   128  	// Defaults to 10 seconds.
   129  	WriteTimeout time.Duration
   130  
   131  	// Number of acknowledges from partition replicas required before receiving
   132  	// a response to a produce request, the following values are supported:
   133  	//
   134  	//  RequireNone (0)  fire-and-forget, do not wait for acknowledgements from the
   135  	//  RequireOne  (1)  wait for the leader to acknowledge the writes
   136  	//  RequireAll  (-1) wait for the full ISR to acknowledge the writes
   137  	//
   138  	// Defaults to RequireNone.
   139  	RequiredAcks RequiredAcks
   140  
   141  	// Setting this flag to true causes the WriteMessages method to never block.
   142  	// It also means that errors are ignored since the caller will not receive
   143  	// the returned value. Use this only if you don't care about guarantees of
   144  	// whether the messages were written to kafka.
   145  	//
   146  	// Defaults to false.
   147  	Async bool
   148  
   149  	// An optional function called when the writer succeeds or fails the
   150  	// delivery of messages to a kafka partition. When writing the messages
   151  	// fails, the `err` parameter will be non-nil.
   152  	//
   153  	// The messages that the Completion function is called with have their
   154  	// topic, partition, offset, and time set based on the Produce responses
   155  	// received from kafka. All messages passed to a call to the function have
   156  	// been written to the same partition. The keys and values of messages are
   157  	// referencing the original byte slices carried by messages in the calls to
   158  	// WriteMessages.
   159  	//
   160  	// The function is called from goroutines started by the writer. Calls to
   161  	// Close will block on the Completion function calls. When the Writer is
   162  	// not writing asynchronously, the WriteMessages call will also block on
   163  	// Completion function, which is a useful guarantee if the byte slices
   164  	// for the message keys and values are intended to be reused after the
   165  	// WriteMessages call returned.
   166  	//
   167  	// If a completion function panics, the program terminates because the
   168  	// panic is not recovered by the writer and bubbles up to the top of the
   169  	// goroutine's call stack.
   170  	Completion func(messages []Message, err error)
   171  
   172  	// Compression set the compression codec to be used to compress messages.
   173  	Compression Compression
   174  
   175  	// If not nil, specifies a logger used to report internal changes within the
   176  	// writer.
   177  	Logger Logger
   178  
   179  	// ErrorLogger is the logger used to report errors. If nil, the writer falls
   180  	// back to using Logger instead.
   181  	ErrorLogger Logger
   182  
   183  	// A transport used to send messages to kafka clusters.
   184  	//
   185  	// If nil, DefaultTransport is used.
   186  	Transport RoundTripper
   187  
   188  	// Manages the current set of partition-topic writers.
   189  	group   sync.WaitGroup
   190  	mutex   sync.Mutex
   191  	closed  bool
   192  	writers map[topicPartition]*partitionWriter
   193  
   194  	// writer stats are all made of atomic values, no need for synchronization.
   195  	// Use a pointer to ensure 64-bit alignment of the values. The once value is
   196  	// used to lazily create the value when first used, allowing programs to use
   197  	// the zero-value value of Writer.
   198  	once sync.Once
   199  	*writerStats
   200  
   201  	// If no balancer is configured, the writer uses this one. RoundRobin values
   202  	// are safe to use concurrently from multiple goroutines, there is no need
   203  	// for extra synchronization to access this field.
   204  	roundRobin RoundRobin
   205  
   206  	// non-nil when a transport was created by NewWriter, remove in 1.0.
   207  	transport *Transport
   208  }
   209  
   210  // WriterConfig is a configuration type used to create new instances of Writer.
   211  //
   212  // DEPRECATED: writer values should be configured directly by assigning their
   213  // exported fields. This type is kept for backward compatibility, and will be
   214  // removed in version 1.0.
   215  type WriterConfig struct {
   216  	// The list of brokers used to discover the partitions available on the
   217  	// kafka cluster.
   218  	//
   219  	// This field is required, attempting to create a writer with an empty list
   220  	// of brokers will panic.
   221  	Brokers []string
   222  
   223  	// The topic that the writer will produce messages to.
   224  	//
   225  	// If provided, this will be used to set the topic for all produced messages.
   226  	// If not provided, each Message must specify a topic for itself. This must be
   227  	// mutually exclusive, otherwise the Writer will return an error.
   228  	Topic string
   229  
   230  	// The dialer used by the writer to establish connections to the kafka
   231  	// cluster.
   232  	//
   233  	// If nil, the default dialer is used instead.
   234  	Dialer *Dialer
   235  
   236  	// The balancer used to distribute messages across partitions.
   237  	//
   238  	// The default is to use a round-robin distribution.
   239  	Balancer Balancer
   240  
   241  	// Limit on how many attempts will be made to deliver a message.
   242  	//
   243  	// The default is to try at most 10 times.
   244  	MaxAttempts int
   245  
   246  	// DEPRECATED: in versions prior to 0.4, the writer used channels internally
   247  	// to dispatch messages to partitions. This has been replaced by an in-memory
   248  	// aggregation of batches which uses shared state instead of message passing,
   249  	// making this option unnecessary.
   250  	QueueCapacity int
   251  
   252  	// Limit on how many messages will be buffered before being sent to a
   253  	// partition.
   254  	//
   255  	// The default is to use a target batch size of 100 messages.
   256  	BatchSize int
   257  
   258  	// Limit the maximum size of a request in bytes before being sent to
   259  	// a partition.
   260  	//
   261  	// The default is to use a kafka default value of 1048576.
   262  	BatchBytes int
   263  
   264  	// Time limit on how often incomplete message batches will be flushed to
   265  	// kafka.
   266  	//
   267  	// The default is to flush at least every second.
   268  	BatchTimeout time.Duration
   269  
   270  	// Timeout for read operations performed by the Writer.
   271  	//
   272  	// Defaults to 10 seconds.
   273  	ReadTimeout time.Duration
   274  
   275  	// Timeout for write operation performed by the Writer.
   276  	//
   277  	// Defaults to 10 seconds.
   278  	WriteTimeout time.Duration
   279  
   280  	// DEPRECATED: in versions prior to 0.4, the writer used to maintain a cache
   281  	// the topic layout. With the change to use a transport to manage connections,
   282  	// the responsibility of syncing the cluster layout has been delegated to the
   283  	// transport.
   284  	RebalanceInterval time.Duration
   285  
   286  	// DEPRECATED: in versions prior to 0.4, the writer used to manage connections
   287  	// to the kafka cluster directly. With the change to use a transport to manage
   288  	// connections, the writer has no connections to manage directly anymore.
   289  	IdleConnTimeout time.Duration
   290  
   291  	// Number of acknowledges from partition replicas required before receiving
   292  	// a response to a produce request. The default is -1, which means to wait for
   293  	// all replicas, and a value above 0 is required to indicate how many replicas
   294  	// should acknowledge a message to be considered successful.
   295  	//
   296  	// This version of kafka-go (v0.3) does not support 0 required acks, due to
   297  	// some internal complexity implementing this with the Kafka protocol. If you
   298  	// need that functionality specifically, you'll need to upgrade to v0.4.
   299  	RequiredAcks int
   300  
   301  	// Setting this flag to true causes the WriteMessages method to never block.
   302  	// It also means that errors are ignored since the caller will not receive
   303  	// the returned value. Use this only if you don't care about guarantees of
   304  	// whether the messages were written to kafka.
   305  	Async bool
   306  
   307  	// CompressionCodec set the codec to be used to compress Kafka messages.
   308  	CompressionCodec
   309  
   310  	// If not nil, specifies a logger used to report internal changes within the
   311  	// writer.
   312  	Logger Logger
   313  
   314  	// ErrorLogger is the logger used to report errors. If nil, the writer falls
   315  	// back to using Logger instead.
   316  	ErrorLogger Logger
   317  }
   318  
   319  type topicPartition struct {
   320  	topic     string
   321  	partition int32
   322  }
   323  
   324  // Validate method validates WriterConfig properties.
   325  func (config *WriterConfig) Validate() error {
   326  	if len(config.Brokers) == 0 {
   327  		return errors.New("cannot create a kafka writer with an empty list of brokers")
   328  	}
   329  	return nil
   330  }
   331  
   332  // WriterStats is a data structure returned by a call to Writer.Stats that
   333  // exposes details about the behavior of the writer.
   334  type WriterStats struct {
   335  	Writes   int64 `metric:"kafka.writer.write.count"     type:"counter"`
   336  	Messages int64 `metric:"kafka.writer.message.count"   type:"counter"`
   337  	Bytes    int64 `metric:"kafka.writer.message.bytes"   type:"counter"`
   338  	Errors   int64 `metric:"kafka.writer.error.count"     type:"counter"`
   339  
   340  	BatchTime  DurationStats `metric:"kafka.writer.batch.seconds"`
   341  	WriteTime  DurationStats `metric:"kafka.writer.write.seconds"`
   342  	WaitTime   DurationStats `metric:"kafka.writer.wait.seconds"`
   343  	Retries    SummaryStats  `metric:"kafka.writer.retries.count"`
   344  	BatchSize  SummaryStats  `metric:"kafka.writer.batch.size"`
   345  	BatchBytes SummaryStats  `metric:"kafka.writer.batch.bytes"`
   346  
   347  	MaxAttempts  int64         `metric:"kafka.writer.attempts.max"  type:"gauge"`
   348  	MaxBatchSize int64         `metric:"kafka.writer.batch.max"     type:"gauge"`
   349  	BatchTimeout time.Duration `metric:"kafka.writer.batch.timeout" type:"gauge"`
   350  	ReadTimeout  time.Duration `metric:"kafka.writer.read.timeout"  type:"gauge"`
   351  	WriteTimeout time.Duration `metric:"kafka.writer.write.timeout" type:"gauge"`
   352  	RequiredAcks int64         `metric:"kafka.writer.acks.required" type:"gauge"`
   353  	Async        bool          `metric:"kafka.writer.async"         type:"gauge"`
   354  
   355  	Topic string `tag:"topic"`
   356  
   357  	// DEPRECATED: these fields will only be reported for backward compatibility
   358  	// if the Writer was constructed with NewWriter.
   359  	Dials    int64         `metric:"kafka.writer.dial.count" type:"counter"`
   360  	DialTime DurationStats `metric:"kafka.writer.dial.seconds"`
   361  
   362  	// DEPRECATED: these fields were meaningful prior to kafka-go 0.4, changes
   363  	// to the internal implementation and the introduction of the transport type
   364  	// made them unnecessary.
   365  	//
   366  	// The values will be zero but are left for backward compatibility to avoid
   367  	// breaking programs that used these fields.
   368  	Rebalances        int64
   369  	RebalanceInterval time.Duration
   370  	QueueLength       int64
   371  	QueueCapacity     int64
   372  	ClientID          string
   373  }
   374  
   375  // writerStats is a struct that contains statistics on a writer.
   376  //
   377  // Since atomic is used to mutate the statistics the values must be 64-bit aligned.
   378  // This is easily accomplished by always allocating this struct directly, (i.e. using a pointer to the struct).
   379  // See https://golang.org/pkg/sync/atomic/#pkg-note-BUG
   380  type writerStats struct {
   381  	dials          counter
   382  	writes         counter
   383  	messages       counter
   384  	bytes          counter
   385  	errors         counter
   386  	dialTime       summary
   387  	batchTime      summary
   388  	writeTime      summary
   389  	waitTime       summary
   390  	retries        summary
   391  	batchSize      summary
   392  	batchSizeBytes summary
   393  }
   394  
   395  // NewWriter creates and returns a new Writer configured with config.
   396  //
   397  // DEPRECATED: Writer value can be instantiated and configured directly,
   398  // this function is retained for backward compatibility and will be removed
   399  // in version 1.0.
   400  func NewWriter(config WriterConfig) *Writer {
   401  	if err := config.Validate(); err != nil {
   402  		panic(err)
   403  	}
   404  
   405  	if config.Dialer == nil {
   406  		config.Dialer = DefaultDialer
   407  	}
   408  
   409  	if config.Balancer == nil {
   410  		config.Balancer = &RoundRobin{}
   411  	}
   412  
   413  	// Converts the pre-0.4 Dialer API into a Transport.
   414  	kafkaDialer := DefaultDialer
   415  	if config.Dialer != nil {
   416  		kafkaDialer = config.Dialer
   417  	}
   418  
   419  	dialer := (&net.Dialer{
   420  		Timeout:       kafkaDialer.Timeout,
   421  		Deadline:      kafkaDialer.Deadline,
   422  		LocalAddr:     kafkaDialer.LocalAddr,
   423  		DualStack:     kafkaDialer.DualStack,
   424  		FallbackDelay: kafkaDialer.FallbackDelay,
   425  		KeepAlive:     kafkaDialer.KeepAlive,
   426  	})
   427  
   428  	var resolver Resolver
   429  	if r, ok := kafkaDialer.Resolver.(*net.Resolver); ok {
   430  		dialer.Resolver = r
   431  	} else {
   432  		resolver = kafkaDialer.Resolver
   433  	}
   434  
   435  	stats := new(writerStats)
   436  	// For backward compatibility with the pre-0.4 APIs, support custom
   437  	// resolvers by wrapping the dial function.
   438  	dial := func(ctx context.Context, network, addr string) (net.Conn, error) {
   439  		start := time.Now()
   440  		defer func() {
   441  			stats.dials.observe(1)
   442  			stats.dialTime.observe(int64(time.Since(start)))
   443  		}()
   444  		address, err := lookupHost(ctx, addr, resolver)
   445  		if err != nil {
   446  			return nil, err
   447  		}
   448  		return dialer.DialContext(ctx, network, address)
   449  	}
   450  
   451  	idleTimeout := config.IdleConnTimeout
   452  	if idleTimeout == 0 {
   453  		// Historical default value of WriterConfig.IdleTimeout, 9 minutes seems
   454  		// like it is way too long when there is no ping mechanism in the kafka
   455  		// protocol.
   456  		idleTimeout = 9 * time.Minute
   457  	}
   458  
   459  	metadataTTL := config.RebalanceInterval
   460  	if metadataTTL == 0 {
   461  		// Historical default value of WriterConfig.RebalanceInterval.
   462  		metadataTTL = 15 * time.Second
   463  	}
   464  
   465  	transport := &Transport{
   466  		Dial:        dial,
   467  		SASL:        kafkaDialer.SASLMechanism,
   468  		TLS:         kafkaDialer.TLS,
   469  		ClientID:    kafkaDialer.ClientID,
   470  		IdleTimeout: idleTimeout,
   471  		MetadataTTL: metadataTTL,
   472  	}
   473  
   474  	w := &Writer{
   475  		Addr:         TCP(config.Brokers...),
   476  		Topic:        config.Topic,
   477  		MaxAttempts:  config.MaxAttempts,
   478  		BatchSize:    config.BatchSize,
   479  		Balancer:     config.Balancer,
   480  		BatchBytes:   int64(config.BatchBytes),
   481  		BatchTimeout: config.BatchTimeout,
   482  		ReadTimeout:  config.ReadTimeout,
   483  		WriteTimeout: config.WriteTimeout,
   484  		RequiredAcks: RequiredAcks(config.RequiredAcks),
   485  		Async:        config.Async,
   486  		Logger:       config.Logger,
   487  		ErrorLogger:  config.ErrorLogger,
   488  		Transport:    transport,
   489  		transport:    transport,
   490  		writerStats:  stats,
   491  	}
   492  
   493  	if config.RequiredAcks == 0 {
   494  		// Historically the writers created by NewWriter have used "all" as the
   495  		// default value when 0 was specified.
   496  		w.RequiredAcks = RequireAll
   497  	}
   498  
   499  	if config.CompressionCodec != nil {
   500  		w.Compression = Compression(config.CompressionCodec.Code())
   501  	}
   502  
   503  	return w
   504  }
   505  
   506  // enter is called by WriteMessages to indicate that a new inflight operation
   507  // has started, which helps synchronize with Close and ensure that the method
   508  // does not return until all inflight operations were completed.
   509  func (w *Writer) enter() bool {
   510  	w.mutex.Lock()
   511  	defer w.mutex.Unlock()
   512  	if w.closed {
   513  		return false
   514  	}
   515  	w.group.Add(1)
   516  	return true
   517  }
   518  
   519  // leave is called by WriteMessages to indicate that the inflight operation has
   520  // completed.
   521  func (w *Writer) leave() { w.group.Done() }
   522  
   523  // spawn starts an new asynchronous operation on the writer. This method is used
   524  // instead of starting goroutines inline to help manage the state of the
   525  // writer's wait group. The wait group is used to block Close calls until all
   526  // inflight operations have completed, therefore automatically including those
   527  // started with calls to spawn.
   528  func (w *Writer) spawn(f func()) {
   529  	w.group.Add(1)
   530  	go func() {
   531  		defer w.group.Done()
   532  		f()
   533  	}()
   534  }
   535  
   536  // Close flushes pending writes, and waits for all writes to complete before
   537  // returning. Calling Close also prevents new writes from being submitted to
   538  // the writer, further calls to WriteMessages and the like will fail with
   539  // io.ErrClosedPipe.
   540  func (w *Writer) Close() error {
   541  	w.mutex.Lock()
   542  	// Marking the writer as closed here causes future calls to WriteMessages to
   543  	// fail with io.ErrClosedPipe. Mutation of this field is synchronized on the
   544  	// writer's mutex to ensure that no more increments of the wait group are
   545  	// performed afterwards (which could otherwise race with the Wait below).
   546  	w.closed = true
   547  
   548  	// close all writers to trigger any pending batches
   549  	for _, writer := range w.writers {
   550  		writer.close()
   551  	}
   552  
   553  	for partition := range w.writers {
   554  		delete(w.writers, partition)
   555  	}
   556  
   557  	w.mutex.Unlock()
   558  	w.group.Wait()
   559  
   560  	if w.transport != nil {
   561  		w.transport.CloseIdleConnections()
   562  	}
   563  
   564  	return nil
   565  }
   566  
   567  // WriteMessages writes a batch of messages to the kafka topic configured on this
   568  // writer.
   569  //
   570  // Unless the writer was configured to write messages asynchronously, the method
   571  // blocks until all messages have been written, or until the maximum number of
   572  // attempts was reached.
   573  //
   574  // When sending synchronously and the writer's batch size is configured to be
   575  // greater than 1, this method blocks until either a full batch can be assembled
   576  // or the batch timeout is reached.  The batch size and timeouts are evaluated
   577  // per partition, so the choice of Balancer can also influence the flushing
   578  // behavior.  For example, the Hash balancer will require on average N * batch
   579  // size messages to trigger a flush where N is the number of partitions.  The
   580  // best way to achieve good batching behavior is to share one Writer amongst
   581  // multiple go routines.
   582  //
   583  // When the method returns an error, it may be of type kafka.WriteError to allow
   584  // the caller to determine the status of each message.
   585  //
   586  // The context passed as first argument may also be used to asynchronously
   587  // cancel the operation. Note that in this case there are no guarantees made on
   588  // whether messages were written to kafka. The program should assume that the
   589  // whole batch failed and re-write the messages later (which could then cause
   590  // duplicates).
   591  func (w *Writer) WriteMessages(ctx context.Context, msgs ...Message) error {
   592  	if w.Addr == nil {
   593  		return errors.New("kafka.(*Writer).WriteMessages: cannot create a kafka writer with a nil address")
   594  	}
   595  
   596  	if !w.enter() {
   597  		return io.ErrClosedPipe
   598  	}
   599  	defer w.leave()
   600  
   601  	if len(msgs) == 0 {
   602  		return nil
   603  	}
   604  
   605  	balancer := w.balancer()
   606  	batchBytes := w.batchBytes()
   607  
   608  	for i := range msgs {
   609  		n := int64(msgs[i].size())
   610  		if n > batchBytes {
   611  			// This error is left for backward compatibility with historical
   612  			// behavior, but it can yield O(N^2) behaviors. The expectations
   613  			// are that the program will check if WriteMessages returned a
   614  			// MessageTooLargeError, discard the message that was exceeding
   615  			// the maximum size, and try again.
   616  			return messageTooLarge(msgs, i)
   617  		}
   618  	}
   619  
   620  	// We use int32 here to half the memory footprint (compared to using int
   621  	// on 64 bits architectures). We map lists of the message indexes instead
   622  	// of the message values for the same reason, int32 is 4 bytes, vs a full
   623  	// Message value which is 100+ bytes and contains pointers and contributes
   624  	// to increasing GC work.
   625  	assignments := make(map[topicPartition][]int32)
   626  
   627  	for i, msg := range msgs {
   628  		topic, err := w.chooseTopic(msg)
   629  		if err != nil {
   630  			return err
   631  		}
   632  
   633  		numPartitions, err := w.partitions(ctx, topic)
   634  		if err != nil {
   635  			return err
   636  		}
   637  
   638  		partition := balancer.Balance(msg, loadCachedPartitions(numPartitions)...)
   639  
   640  		key := topicPartition{
   641  			topic:     topic,
   642  			partition: int32(partition),
   643  		}
   644  
   645  		assignments[key] = append(assignments[key], int32(i))
   646  	}
   647  
   648  	batches := w.batchMessages(msgs, assignments)
   649  	if w.Async {
   650  		return nil
   651  	}
   652  
   653  	done := ctx.Done()
   654  	hasErrors := false
   655  	for batch := range batches {
   656  		select {
   657  		case <-done:
   658  			return ctx.Err()
   659  		case <-batch.done:
   660  			if batch.err != nil {
   661  				hasErrors = true
   662  			}
   663  		}
   664  	}
   665  
   666  	if !hasErrors {
   667  		return nil
   668  	}
   669  
   670  	werr := make(WriteErrors, len(msgs))
   671  
   672  	for batch, indexes := range batches {
   673  		for _, i := range indexes {
   674  			werr[i] = batch.err
   675  		}
   676  	}
   677  	return werr
   678  }
   679  
   680  func (w *Writer) batchMessages(messages []Message, assignments map[topicPartition][]int32) map[*writeBatch][]int32 {
   681  	var batches map[*writeBatch][]int32
   682  	if !w.Async {
   683  		batches = make(map[*writeBatch][]int32, len(assignments))
   684  	}
   685  
   686  	w.mutex.Lock()
   687  	defer w.mutex.Unlock()
   688  
   689  	if w.writers == nil {
   690  		w.writers = map[topicPartition]*partitionWriter{}
   691  	}
   692  
   693  	for key, indexes := range assignments {
   694  		writer := w.writers[key]
   695  		if writer == nil {
   696  			writer = newPartitionWriter(w, key)
   697  			w.writers[key] = writer
   698  		}
   699  		wbatches := writer.writeMessages(messages, indexes)
   700  
   701  		for batch, idxs := range wbatches {
   702  			batches[batch] = idxs
   703  		}
   704  	}
   705  
   706  	return batches
   707  }
   708  
   709  func (w *Writer) produce(key topicPartition, batch *writeBatch) (*ProduceResponse, error) {
   710  	timeout := w.writeTimeout()
   711  
   712  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   713  	defer cancel()
   714  
   715  	return w.client(timeout).Produce(ctx, &ProduceRequest{
   716  		Partition:    int(key.partition),
   717  		Topic:        key.topic,
   718  		RequiredAcks: w.RequiredAcks,
   719  		Compression:  w.Compression,
   720  		Records: &writerRecords{
   721  			msgs: batch.msgs,
   722  		},
   723  	})
   724  }
   725  
   726  func (w *Writer) partitions(ctx context.Context, topic string) (int, error) {
   727  	client := w.client(w.readTimeout())
   728  	// Here we use the transport directly as an optimization to avoid the
   729  	// construction of temporary request and response objects made by the
   730  	// (*Client).Metadata API.
   731  	//
   732  	// It is expected that the transport will optimize this request by
   733  	// caching recent results (the kafka.Transport types does).
   734  	r, err := client.transport().RoundTrip(ctx, client.Addr, &metadataAPI.Request{
   735  		TopicNames:             []string{topic},
   736  		AllowAutoTopicCreation: true,
   737  	})
   738  	if err != nil {
   739  		return 0, err
   740  	}
   741  	for _, t := range r.(*metadataAPI.Response).Topics {
   742  		if t.Name == topic {
   743  			// This should always hit, unless kafka has a bug.
   744  			if t.ErrorCode != 0 {
   745  				return 0, Error(t.ErrorCode)
   746  			}
   747  			return len(t.Partitions), nil
   748  		}
   749  	}
   750  	return 0, UnknownTopicOrPartition
   751  }
   752  
   753  func (w *Writer) client(timeout time.Duration) *Client {
   754  	return &Client{
   755  		Addr:      w.Addr,
   756  		Transport: w.Transport,
   757  		Timeout:   timeout,
   758  	}
   759  }
   760  
   761  func (w *Writer) balancer() Balancer {
   762  	if w.Balancer != nil {
   763  		return w.Balancer
   764  	}
   765  	return &w.roundRobin
   766  }
   767  
   768  func (w *Writer) maxAttempts() int {
   769  	if w.MaxAttempts > 0 {
   770  		return w.MaxAttempts
   771  	}
   772  	// TODO: this is a very high default, if something has failed 9 times it
   773  	// seems unlikely it will succeed on the 10th attempt. However, it does
   774  	// carry the risk to greatly increase the volume of requests sent to the
   775  	// kafka cluster. We should consider reducing this default (3?).
   776  	return 10
   777  }
   778  
   779  func (w *Writer) batchSize() int {
   780  	if w.BatchSize > 0 {
   781  		return w.BatchSize
   782  	}
   783  	return 100
   784  }
   785  
   786  func (w *Writer) batchBytes() int64 {
   787  	if w.BatchBytes > 0 {
   788  		return w.BatchBytes
   789  	}
   790  	return 1048576
   791  }
   792  
   793  func (w *Writer) batchTimeout() time.Duration {
   794  	if w.BatchTimeout > 0 {
   795  		return w.BatchTimeout
   796  	}
   797  	return 1 * time.Second
   798  }
   799  
   800  func (w *Writer) readTimeout() time.Duration {
   801  	if w.ReadTimeout > 0 {
   802  		return w.ReadTimeout
   803  	}
   804  	return 10 * time.Second
   805  }
   806  
   807  func (w *Writer) writeTimeout() time.Duration {
   808  	if w.WriteTimeout > 0 {
   809  		return w.WriteTimeout
   810  	}
   811  	return 10 * time.Second
   812  }
   813  
   814  func (w *Writer) withLogger(do func(Logger)) {
   815  	if w.Logger != nil {
   816  		do(w.Logger)
   817  	}
   818  }
   819  
   820  func (w *Writer) withErrorLogger(do func(Logger)) {
   821  	if w.ErrorLogger != nil {
   822  		do(w.ErrorLogger)
   823  	} else {
   824  		w.withLogger(do)
   825  	}
   826  }
   827  
   828  func (w *Writer) stats() *writerStats {
   829  	w.once.Do(func() {
   830  		// This field is not nil when the writer was constructed with NewWriter
   831  		// to share the value with the dial function and count dials.
   832  		if w.writerStats == nil {
   833  			w.writerStats = new(writerStats)
   834  		}
   835  	})
   836  	return w.writerStats
   837  }
   838  
   839  // Stats returns a snapshot of the writer stats since the last time the method
   840  // was called, or since the writer was created if it is called for the first
   841  // time.
   842  //
   843  // A typical use of this method is to spawn a goroutine that will periodically
   844  // call Stats on a kafka writer and report the metrics to a stats collection
   845  // system.
   846  func (w *Writer) Stats() WriterStats {
   847  	stats := w.stats()
   848  	return WriterStats{
   849  		Dials:        stats.dials.snapshot(),
   850  		Writes:       stats.writes.snapshot(),
   851  		Messages:     stats.messages.snapshot(),
   852  		Bytes:        stats.bytes.snapshot(),
   853  		Errors:       stats.errors.snapshot(),
   854  		DialTime:     stats.dialTime.snapshotDuration(),
   855  		BatchTime:    stats.batchTime.snapshotDuration(),
   856  		WriteTime:    stats.writeTime.snapshotDuration(),
   857  		WaitTime:     stats.waitTime.snapshotDuration(),
   858  		Retries:      stats.retries.snapshot(),
   859  		BatchSize:    stats.batchSize.snapshot(),
   860  		BatchBytes:   stats.batchSizeBytes.snapshot(),
   861  		MaxAttempts:  int64(w.MaxAttempts),
   862  		MaxBatchSize: int64(w.BatchSize),
   863  		BatchTimeout: w.BatchTimeout,
   864  		ReadTimeout:  w.ReadTimeout,
   865  		WriteTimeout: w.WriteTimeout,
   866  		RequiredAcks: int64(w.RequiredAcks),
   867  		Async:        w.Async,
   868  		Topic:        w.Topic,
   869  	}
   870  }
   871  
   872  func (w *Writer) chooseTopic(msg Message) (string, error) {
   873  	// w.Topic and msg.Topic are mutually exclusive, meaning only 1 must be set
   874  	// otherwise we will return an error.
   875  	if w.Topic != "" && msg.Topic != "" {
   876  		return "", errors.New("kafka.(*Writer): Topic must not be specified for both Writer and Message")
   877  	} else if w.Topic == "" && msg.Topic == "" {
   878  		return "", errors.New("kafka.(*Writer): Topic must be specified for Writer or Message")
   879  	}
   880  
   881  	// now we choose the topic, depending on which one is not empty
   882  	if msg.Topic != "" {
   883  		return msg.Topic, nil
   884  	}
   885  
   886  	return w.Topic, nil
   887  }
   888  
   889  type batchQueue struct {
   890  	queue []*writeBatch
   891  
   892  	// Pointers are used here to make `go vet` happy, and avoid copying mutexes.
   893  	// It may be better to revert these to non-pointers and avoid the copies in
   894  	// a different way.
   895  	mutex *sync.Mutex
   896  	cond  *sync.Cond
   897  
   898  	closed bool
   899  }
   900  
   901  func (b *batchQueue) Put(batch *writeBatch) bool {
   902  	b.cond.L.Lock()
   903  	defer b.cond.L.Unlock()
   904  	defer b.cond.Broadcast()
   905  
   906  	if b.closed {
   907  		return false
   908  	}
   909  	b.queue = append(b.queue, batch)
   910  	return true
   911  }
   912  
   913  func (b *batchQueue) Get() *writeBatch {
   914  	b.cond.L.Lock()
   915  	defer b.cond.L.Unlock()
   916  
   917  	for len(b.queue) == 0 && !b.closed {
   918  		b.cond.Wait()
   919  	}
   920  
   921  	if len(b.queue) == 0 {
   922  		return nil
   923  	}
   924  
   925  	batch := b.queue[0]
   926  	b.queue[0] = nil
   927  	b.queue = b.queue[1:]
   928  
   929  	return batch
   930  }
   931  
   932  func (b *batchQueue) Close() {
   933  	b.cond.L.Lock()
   934  	defer b.cond.L.Unlock()
   935  	defer b.cond.Broadcast()
   936  
   937  	b.closed = true
   938  }
   939  
   940  func newBatchQueue(initialSize int) batchQueue {
   941  	bq := batchQueue{
   942  		queue: make([]*writeBatch, 0, initialSize),
   943  		mutex: &sync.Mutex{},
   944  		cond: &sync.Cond{},
   945  	}
   946  
   947  	bq.cond.L = bq.mutex
   948  
   949  	return bq
   950  }
   951  
   952  // partitionWriter is a writer for a topic-partion pair. It maintains messaging order
   953  // across batches of messages.
   954  type partitionWriter struct {
   955  	meta  topicPartition
   956  	queue batchQueue
   957  
   958  	mutex     sync.Mutex
   959  	currBatch *writeBatch
   960  
   961  	// reference to the writer that owns this batch. Used for the produce logic
   962  	// as well as stat tracking
   963  	w *Writer
   964  }
   965  
   966  func newPartitionWriter(w *Writer, key topicPartition) *partitionWriter {
   967  	writer := &partitionWriter{
   968  		meta:  key,
   969  		queue: newBatchQueue(10),
   970  		w:     w,
   971  	}
   972  	w.spawn(writer.writeBatches)
   973  	return writer
   974  }
   975  
   976  func (ptw *partitionWriter) writeBatches() {
   977  	for {
   978  		batch := ptw.queue.Get()
   979  
   980  		// The only time we can return nil is when the queue is closed
   981  		// and empty. If the queue is closed that means
   982  		// the Writer is closed so once we're here it's time to exit.
   983  		if batch == nil {
   984  			return
   985  		}
   986  
   987  		ptw.writeBatch(batch)
   988  	}
   989  }
   990  
   991  func (ptw *partitionWriter) writeMessages(msgs []Message, indexes []int32) map[*writeBatch][]int32 {
   992  	ptw.mutex.Lock()
   993  	defer ptw.mutex.Unlock()
   994  
   995  	batchSize := ptw.w.batchSize()
   996  	batchBytes := ptw.w.batchBytes()
   997  
   998  	var batches map[*writeBatch][]int32
   999  	if !ptw.w.Async {
  1000  		batches = make(map[*writeBatch][]int32, 1)
  1001  	}
  1002  
  1003  	for _, i := range indexes {
  1004  	assignMessage:
  1005  		batch := ptw.currBatch
  1006  		if batch == nil {
  1007  			batch = ptw.newWriteBatch()
  1008  			ptw.currBatch = batch
  1009  		}
  1010  		if !batch.add(msgs[i], batchSize, batchBytes) {
  1011  			batch.trigger()
  1012  			ptw.queue.Put(batch)
  1013  			ptw.currBatch = nil
  1014  			goto assignMessage
  1015  		}
  1016  
  1017  		if batch.full(batchSize, batchBytes) {
  1018  			batch.trigger()
  1019  			ptw.queue.Put(batch)
  1020  			ptw.currBatch = nil
  1021  		}
  1022  
  1023  		if !ptw.w.Async {
  1024  			batches[batch] = append(batches[batch], i)
  1025  		}
  1026  	}
  1027  	return batches
  1028  }
  1029  
  1030  // ptw.w can be accessed here because this is called with the lock ptw.mutex already held.
  1031  func (ptw *partitionWriter) newWriteBatch() *writeBatch {
  1032  	batch := newWriteBatch(time.Now(), ptw.w.batchTimeout())
  1033  	ptw.w.spawn(func() { ptw.awaitBatch(batch) })
  1034  	return batch
  1035  }
  1036  
  1037  // awaitBatch waits for a batch to either fill up or time out.
  1038  // If the batch is full it only stops the timer, if the timer
  1039  // expires it will queue the batch for writing if needed.
  1040  func (ptw *partitionWriter) awaitBatch(batch *writeBatch) {
  1041  	select {
  1042  	case <-batch.timer.C:
  1043  		ptw.mutex.Lock()
  1044  		// detach the batch from the writer if we're still attached
  1045  		// and queue for writing.
  1046  		// Only the current batch can expire, all previous batches were already written to the queue.
  1047  		// If writeMesseages locks pw.mutex after the timer fires but before this goroutine
  1048  		// can lock pw.mutex it will either have filled the batch and enqueued it which will mean
  1049  		// pw.currBatch != batch so we just move on.
  1050  		// Otherwise, we detach the batch from the ptWriter and enqueue it for writing.
  1051  		if ptw.currBatch == batch {
  1052  			ptw.queue.Put(batch)
  1053  			ptw.currBatch = nil
  1054  		}
  1055  		ptw.mutex.Unlock()
  1056  	case <-batch.ready:
  1057  		// The batch became full, it was removed from the ptwriter and its
  1058  		// ready channel was closed. We need to close the timer to avoid
  1059  		// having it leak until it expires.
  1060  		batch.timer.Stop()
  1061  	}
  1062  }
  1063  
  1064  func (ptw *partitionWriter) writeBatch(batch *writeBatch) {
  1065  	stats := ptw.w.stats()
  1066  	stats.batchTime.observe(int64(time.Since(batch.time)))
  1067  	stats.batchSize.observe(int64(len(batch.msgs)))
  1068  	stats.batchSizeBytes.observe(batch.bytes)
  1069  
  1070  	var res *ProduceResponse
  1071  	var err error
  1072  	key := ptw.meta
  1073  	for attempt, maxAttempts := 0, ptw.w.maxAttempts(); attempt < maxAttempts; attempt++ {
  1074  		if attempt != 0 {
  1075  			stats.retries.observe(1)
  1076  			// TODO: should there be a way to asynchronously cancel this
  1077  			// operation?
  1078  			//
  1079  			// * If all goroutines that added message to this batch have stopped
  1080  			//   waiting for it, should we abort?
  1081  			//
  1082  			// * If the writer has been closed? It reduces the durability
  1083  			//   guarantees to abort, but may be better to avoid long wait times
  1084  			//   on close.
  1085  			//
  1086  			delay := backoff(attempt, 100*time.Millisecond, 1*time.Second)
  1087  			ptw.w.withLogger(func(log Logger) {
  1088  				log.Printf("backing off %s writing %d messages to %s (partition: %d)", delay, len(batch.msgs), key.topic, key.partition)
  1089  			})
  1090  			time.Sleep(delay)
  1091  		}
  1092  
  1093  		ptw.w.withLogger(func(log Logger) {
  1094  			log.Printf("writing %d messages to %s (partition: %d)", len(batch.msgs), key.topic, key.partition)
  1095  		})
  1096  
  1097  		start := time.Now()
  1098  		res, err = ptw.w.produce(key, batch)
  1099  
  1100  		stats.writes.observe(1)
  1101  		stats.messages.observe(int64(len(batch.msgs)))
  1102  		stats.bytes.observe(batch.bytes)
  1103  		// stats.writeTime used to report the duration of WriteMessages, but the
  1104  		// implementation was broken and reporting values in the nanoseconds
  1105  		// range. In kafka-go 0.4, we recylced this value to instead report the
  1106  		// duration of produce requests, and changed the stats.waitTime value to
  1107  		// report the time that kafka has throttled the requests for.
  1108  		stats.writeTime.observe(int64(time.Since(start)))
  1109  
  1110  		if res != nil {
  1111  			err = res.Error
  1112  			stats.waitTime.observe(int64(res.Throttle))
  1113  		}
  1114  
  1115  		if err == nil {
  1116  			break
  1117  		}
  1118  
  1119  		stats.errors.observe(1)
  1120  
  1121  		ptw.w.withErrorLogger(func(log Logger) {
  1122  			log.Printf("error writing messages to %s (partition %d): %s", key.topic, key.partition, err)
  1123  		})
  1124  
  1125  		if !isTemporary(err) && !isTransientNetworkError(err) {
  1126  			break
  1127  		}
  1128  	}
  1129  
  1130  	if res != nil {
  1131  		for i := range batch.msgs {
  1132  			m := &batch.msgs[i]
  1133  			m.Topic = key.topic
  1134  			m.Partition = int(key.partition)
  1135  			m.Offset = res.BaseOffset + int64(i)
  1136  
  1137  			if m.Time.IsZero() {
  1138  				m.Time = res.LogAppendTime
  1139  			}
  1140  		}
  1141  	}
  1142  
  1143  	if ptw.w.Completion != nil {
  1144  		ptw.w.Completion(batch.msgs, err)
  1145  	}
  1146  
  1147  	batch.complete(err)
  1148  }
  1149  
  1150  func (ptw *partitionWriter) close() {
  1151  	ptw.mutex.Lock()
  1152  	defer ptw.mutex.Unlock()
  1153  
  1154  	if ptw.currBatch != nil {
  1155  		batch := ptw.currBatch
  1156  		ptw.queue.Put(batch)
  1157  		ptw.currBatch = nil
  1158  		batch.trigger()
  1159  	}
  1160  
  1161  	ptw.queue.Close()
  1162  }
  1163  
  1164  type writeBatch struct {
  1165  	time  time.Time
  1166  	msgs  []Message
  1167  	size  int
  1168  	bytes int64
  1169  	ready chan struct{}
  1170  	done  chan struct{}
  1171  	timer *time.Timer
  1172  	err   error // result of the batch completion
  1173  }
  1174  
  1175  func newWriteBatch(now time.Time, timeout time.Duration) *writeBatch {
  1176  	return &writeBatch{
  1177  		time:  now,
  1178  		ready: make(chan struct{}),
  1179  		done:  make(chan struct{}),
  1180  		timer: time.NewTimer(timeout),
  1181  	}
  1182  }
  1183  
  1184  func (b *writeBatch) add(msg Message, maxSize int, maxBytes int64) bool {
  1185  	bytes := int64(msg.size())
  1186  
  1187  	if b.size > 0 && (b.bytes+bytes) > maxBytes {
  1188  		return false
  1189  	}
  1190  
  1191  	if cap(b.msgs) == 0 {
  1192  		b.msgs = make([]Message, 0, maxSize)
  1193  	}
  1194  
  1195  	b.msgs = append(b.msgs, msg)
  1196  	b.size++
  1197  	b.bytes += bytes
  1198  	return true
  1199  }
  1200  
  1201  func (b *writeBatch) full(maxSize int, maxBytes int64) bool {
  1202  	return b.size >= maxSize || b.bytes >= maxBytes
  1203  }
  1204  
  1205  func (b *writeBatch) trigger() {
  1206  	close(b.ready)
  1207  }
  1208  
  1209  func (b *writeBatch) complete(err error) {
  1210  	b.err = err
  1211  	close(b.done)
  1212  }
  1213  
  1214  type writerRecords struct {
  1215  	msgs   []Message
  1216  	index  int
  1217  	record Record
  1218  	key    bytesReadCloser
  1219  	value  bytesReadCloser
  1220  }
  1221  
  1222  func (r *writerRecords) ReadRecord() (*Record, error) {
  1223  	if r.index >= 0 && r.index < len(r.msgs) {
  1224  		m := &r.msgs[r.index]
  1225  		r.index++
  1226  		r.record = Record{
  1227  			Time:    m.Time,
  1228  			Headers: m.Headers,
  1229  		}
  1230  		if m.Key != nil {
  1231  			r.key.Reset(m.Key)
  1232  			r.record.Key = &r.key
  1233  		}
  1234  		if m.Value != nil {
  1235  			r.value.Reset(m.Value)
  1236  			r.record.Value = &r.value
  1237  		}
  1238  		return &r.record, nil
  1239  	}
  1240  	return nil, io.EOF
  1241  }
  1242  
  1243  type bytesReadCloser struct{ bytes.Reader }
  1244  
  1245  func (*bytesReadCloser) Close() error { return nil }
  1246  
  1247  // A cache of []int values passed to balancers of writers, used to amortize the
  1248  // heap allocation of the partition index lists.
  1249  //
  1250  // With hindsight, the use of `...int` to pass the partition list to Balancers
  1251  // was not the best design choice: kafka partition numbers are monotonically
  1252  // increasing, we could have simply passed the number of partitions instead.
  1253  // If we ever revisit this API, we can hopefully remove this cache.
  1254  var partitionsCache atomic.Value
  1255  
  1256  func loadCachedPartitions(numPartitions int) []int {
  1257  	partitions, ok := partitionsCache.Load().([]int)
  1258  	if ok && len(partitions) >= numPartitions {
  1259  		return partitions[:numPartitions]
  1260  	}
  1261  
  1262  	const alignment = 128
  1263  	n := ((numPartitions / alignment) + 1) * alignment
  1264  
  1265  	partitions = make([]int, n)
  1266  	for i := range partitions {
  1267  		partitions[i] = i
  1268  	}
  1269  
  1270  	partitionsCache.Store(partitions)
  1271  	return partitions[:numPartitions]
  1272  }