github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/sink.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package changefeedccl
    10  
    11  import (
    12  	"context"
    13  	"crypto/tls"
    14  	"crypto/x509"
    15  	gosql "database/sql"
    16  	"encoding/base64"
    17  	"fmt"
    18  	"hash"
    19  	"hash/fnv"
    20  	"net/url"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/Shopify/sarama"
    27  	"github.com/cockroachdb/cockroach/pkg/base"
    28  	"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/changefeedbase"
    29  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    30  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    31  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    34  	"github.com/cockroachdb/cockroach/pkg/sql/sem/builtins"
    35  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    36  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    37  	"github.com/cockroachdb/cockroach/pkg/storage/cloud"
    38  	"github.com/cockroachdb/cockroach/pkg/util/bufalloc"
    39  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    40  	"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
    41  	"github.com/cockroachdb/cockroach/pkg/util/log"
    42  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    43  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    44  	"github.com/cockroachdb/errors"
    45  	"github.com/cockroachdb/logtags"
    46  )
    47  
    48  // Sink is an abstraction for anything that a changefeed may emit into.
    49  type Sink interface {
    50  	// EmitRow enqueues a row message for asynchronous delivery on the sink. An
    51  	// error may be returned if a previously enqueued message has failed.
    52  	EmitRow(
    53  		ctx context.Context,
    54  		table *sqlbase.TableDescriptor,
    55  		key, value []byte,
    56  		updated hlc.Timestamp,
    57  	) error
    58  	// EmitResolvedTimestamp enqueues a resolved timestamp message for
    59  	// asynchronous delivery on every topic that has been seen by EmitRow. An
    60  	// error may be returned if a previously enqueued message has failed.
    61  	EmitResolvedTimestamp(ctx context.Context, encoder Encoder, resolved hlc.Timestamp) error
    62  	// Flush blocks until every message enqueued by EmitRow and
    63  	// EmitResolvedTimestamp has been acknowledged by the sink. If an error is
    64  	// returned, no guarantees are given about which messages have been
    65  	// delivered or not delivered.
    66  	Flush(ctx context.Context) error
    67  	// Close does not guarantee delivery of outstanding messages.
    68  	Close() error
    69  }
    70  
    71  func getSink(
    72  	ctx context.Context,
    73  	sinkURI string,
    74  	nodeID roachpb.NodeID,
    75  	opts map[string]string,
    76  	targets jobspb.ChangefeedTargets,
    77  	settings *cluster.Settings,
    78  	timestampOracle timestampLowerBoundOracle,
    79  	makeExternalStorageFromURI cloud.ExternalStorageFromURIFactory,
    80  ) (Sink, error) {
    81  	u, err := url.Parse(sinkURI)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	q := u.Query()
    86  
    87  	// Use a function here to delay creation of the sink until after we've done
    88  	// all the parameter verification.
    89  	var makeSink func() (Sink, error)
    90  	switch {
    91  	case u.Scheme == changefeedbase.SinkSchemeBuffer:
    92  		makeSink = func() (Sink, error) { return &bufferSink{}, nil }
    93  	case u.Scheme == changefeedbase.SinkSchemeKafka:
    94  		var cfg kafkaSinkConfig
    95  		cfg.kafkaTopicPrefix = q.Get(changefeedbase.SinkParamTopicPrefix)
    96  		q.Del(changefeedbase.SinkParamTopicPrefix)
    97  		if schemaTopic := q.Get(changefeedbase.SinkParamSchemaTopic); schemaTopic != `` {
    98  			return nil, errors.Errorf(`%s is not yet supported`, changefeedbase.SinkParamSchemaTopic)
    99  		}
   100  		q.Del(changefeedbase.SinkParamSchemaTopic)
   101  		if tlsBool := q.Get(changefeedbase.SinkParamTLSEnabled); tlsBool != `` {
   102  			var err error
   103  			if cfg.tlsEnabled, err = strconv.ParseBool(tlsBool); err != nil {
   104  				return nil, errors.Errorf(`param %s must be a bool: %s`, changefeedbase.SinkParamTLSEnabled, err)
   105  			}
   106  		}
   107  		q.Del(changefeedbase.SinkParamTLSEnabled)
   108  		if caCertHex := q.Get(changefeedbase.SinkParamCACert); caCertHex != `` {
   109  			// TODO(dan): There's a straightforward and unambiguous transformation
   110  			// between the base 64 encoding defined in RFC 4648 and the URL variant
   111  			// defined in the same RFC: simply replace all `+` with `-` and `/` with
   112  			// `_`. Consider always doing this for the user and accepting either
   113  			// variant.
   114  			if cfg.caCert, err = base64.StdEncoding.DecodeString(caCertHex); err != nil {
   115  				return nil, errors.Errorf(`param %s must be base 64 encoded: %s`, changefeedbase.SinkParamCACert, err)
   116  			}
   117  		}
   118  		q.Del(changefeedbase.SinkParamCACert)
   119  		if clientCertHex := q.Get(changefeedbase.SinkParamClientCert); clientCertHex != `` {
   120  			if cfg.clientCert, err = base64.StdEncoding.DecodeString(clientCertHex); err != nil {
   121  				return nil, errors.Errorf(`param %s must be base 64 encoded: %s`, changefeedbase.SinkParamClientCert, err)
   122  			}
   123  		}
   124  		q.Del(changefeedbase.SinkParamClientCert)
   125  		if clientKeyHex := q.Get(changefeedbase.SinkParamClientKey); clientKeyHex != `` {
   126  			if cfg.clientKey, err = base64.StdEncoding.DecodeString(clientKeyHex); err != nil {
   127  				return nil, errors.Errorf(`param %s must be base 64 encoded: %s`, changefeedbase.SinkParamClientKey, err)
   128  			}
   129  		}
   130  		q.Del(changefeedbase.SinkParamClientKey)
   131  
   132  		saslParam := q.Get(changefeedbase.SinkParamSASLEnabled)
   133  		q.Del(changefeedbase.SinkParamSASLEnabled)
   134  		if saslParam != `` {
   135  			b, err := strconv.ParseBool(saslParam)
   136  			if err != nil {
   137  				return nil, errors.Wrapf(err, `param %s must be a bool:`, changefeedbase.SinkParamSASLEnabled)
   138  			}
   139  			cfg.saslEnabled = b
   140  		}
   141  		handshakeParam := q.Get(changefeedbase.SinkParamSASLHandshake)
   142  		q.Del(changefeedbase.SinkParamSASLHandshake)
   143  		if handshakeParam == `` {
   144  			cfg.saslHandshake = true
   145  		} else {
   146  			if !cfg.saslEnabled {
   147  				return nil, errors.Errorf(`%s must be enabled to configure SASL handshake behavior`, changefeedbase.SinkParamSASLEnabled)
   148  			}
   149  			b, err := strconv.ParseBool(handshakeParam)
   150  			if err != nil {
   151  				return nil, errors.Wrapf(err, `param %s must be a bool:`, changefeedbase.SinkParamSASLHandshake)
   152  			}
   153  			cfg.saslHandshake = b
   154  		}
   155  		cfg.saslUser = q.Get(changefeedbase.SinkParamSASLUser)
   156  		q.Del(changefeedbase.SinkParamSASLUser)
   157  		cfg.saslPassword = q.Get(changefeedbase.SinkParamSASLPassword)
   158  		q.Del(changefeedbase.SinkParamSASLPassword)
   159  		if cfg.saslEnabled {
   160  			if cfg.saslUser == `` {
   161  				return nil, errors.Errorf(`%s must be provided when SASL is enabled`, changefeedbase.SinkParamSASLUser)
   162  			}
   163  			if cfg.saslPassword == `` {
   164  				return nil, errors.Errorf(`%s must be provided when SASL is enabled`, changefeedbase.SinkParamSASLPassword)
   165  			}
   166  		} else {
   167  			if cfg.saslUser != `` {
   168  				return nil, errors.Errorf(`%s must be enabled if a SASL user is provided`, changefeedbase.SinkParamSASLEnabled)
   169  			}
   170  			if cfg.saslPassword != `` {
   171  				return nil, errors.Errorf(`%s must be enabled if a SASL password is provided`, changefeedbase.SinkParamSASLEnabled)
   172  			}
   173  		}
   174  
   175  		makeSink = func() (Sink, error) {
   176  			return makeKafkaSink(cfg, u.Host, targets)
   177  		}
   178  	case isCloudStorageSink(u):
   179  		fileSizeParam := q.Get(changefeedbase.SinkParamFileSize)
   180  		q.Del(changefeedbase.SinkParamFileSize)
   181  		var fileSize int64 = 16 << 20 // 16MB
   182  		if fileSizeParam != `` {
   183  			if fileSize, err = humanizeutil.ParseBytes(fileSizeParam); err != nil {
   184  				return nil, pgerror.Wrapf(err, pgcode.Syntax, `parsing %s`, fileSizeParam)
   185  			}
   186  		}
   187  		u.Scheme = strings.TrimPrefix(u.Scheme, `experimental-`)
   188  		// Transfer "ownership" of validating all remaining query parameters to
   189  		// ExternalStorage.
   190  		u.RawQuery = q.Encode()
   191  		q = url.Values{}
   192  		makeSink = func() (Sink, error) {
   193  			return makeCloudStorageSink(
   194  				ctx, u.String(), nodeID, fileSize, settings,
   195  				opts, timestampOracle, makeExternalStorageFromURI,
   196  			)
   197  		}
   198  	case u.Scheme == changefeedbase.SinkSchemeExperimentalSQL:
   199  		// Swap the changefeed prefix for the sql connection one that sqlSink
   200  		// expects.
   201  		u.Scheme = `postgres`
   202  		// TODO(dan): Make tableName configurable or based on the job ID or
   203  		// something.
   204  		tableName := `sqlsink`
   205  		makeSink = func() (Sink, error) {
   206  			return makeSQLSink(u.String(), tableName, targets)
   207  		}
   208  		// Remove parameters we know about for the unknown parameter check.
   209  		q.Del(`sslcert`)
   210  		q.Del(`sslkey`)
   211  		q.Del(`sslmode`)
   212  		q.Del(`sslrootcert`)
   213  	default:
   214  		return nil, errors.Errorf(`unsupported sink: %s`, u.Scheme)
   215  	}
   216  
   217  	for k := range q {
   218  		return nil, errors.Errorf(`unknown sink query parameter: %s`, k)
   219  	}
   220  
   221  	s, err := makeSink()
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  	return s, nil
   226  }
   227  
   228  // errorWrapperSink delegates to another sink and marks all returned errors as
   229  // retryable. During changefeed setup, we use the sink once without this to
   230  // verify configuration, but in the steady state, no sink error should be
   231  // terminal.
   232  type errorWrapperSink struct {
   233  	wrapped Sink
   234  }
   235  
   236  func (s errorWrapperSink) EmitRow(
   237  	ctx context.Context, table *sqlbase.TableDescriptor, key, value []byte, updated hlc.Timestamp,
   238  ) error {
   239  	if err := s.wrapped.EmitRow(ctx, table, key, value, updated); err != nil {
   240  		return MarkRetryableError(err)
   241  	}
   242  	return nil
   243  }
   244  
   245  func (s errorWrapperSink) EmitResolvedTimestamp(
   246  	ctx context.Context, encoder Encoder, resolved hlc.Timestamp,
   247  ) error {
   248  	if err := s.wrapped.EmitResolvedTimestamp(ctx, encoder, resolved); err != nil {
   249  		return MarkRetryableError(err)
   250  	}
   251  	return nil
   252  }
   253  
   254  func (s errorWrapperSink) Flush(ctx context.Context) error {
   255  	if err := s.wrapped.Flush(ctx); err != nil {
   256  		return MarkRetryableError(err)
   257  	}
   258  	return nil
   259  }
   260  
   261  func (s errorWrapperSink) Close() error {
   262  	if err := s.wrapped.Close(); err != nil {
   263  		return MarkRetryableError(err)
   264  	}
   265  	return nil
   266  }
   267  
   268  type kafkaLogAdapter struct {
   269  	ctx context.Context
   270  }
   271  
   272  var _ sarama.StdLogger = (*kafkaLogAdapter)(nil)
   273  
   274  func (l *kafkaLogAdapter) Print(v ...interface{}) {
   275  	log.InfofDepth(l.ctx, 1, "", v...)
   276  }
   277  func (l *kafkaLogAdapter) Printf(format string, v ...interface{}) {
   278  	log.InfofDepth(l.ctx, 1, format, v...)
   279  }
   280  func (l *kafkaLogAdapter) Println(v ...interface{}) {
   281  	log.InfofDepth(l.ctx, 1, "", v...)
   282  }
   283  
   284  func init() {
   285  	// We'd much prefer to make one of these per sink, so we can use the real
   286  	// context, but quite unfortunately, sarama only has a global logger hook.
   287  	ctx := context.Background()
   288  	ctx = logtags.AddTag(ctx, "kafka-producer", nil)
   289  	sarama.Logger = &kafkaLogAdapter{ctx: ctx}
   290  }
   291  
   292  type kafkaSinkConfig struct {
   293  	kafkaTopicPrefix string
   294  	tlsEnabled       bool
   295  	caCert           []byte
   296  	clientCert       []byte
   297  	clientKey        []byte
   298  	saslEnabled      bool
   299  	saslHandshake    bool
   300  	saslUser         string
   301  	saslPassword     string
   302  }
   303  
   304  // kafkaSink emits to Kafka asynchronously. It is not concurrency-safe; all
   305  // calls to Emit and Flush should be from the same goroutine.
   306  type kafkaSink struct {
   307  	cfg      kafkaSinkConfig
   308  	client   sarama.Client
   309  	producer sarama.AsyncProducer
   310  	topics   map[string]struct{}
   311  
   312  	lastMetadataRefresh time.Time
   313  
   314  	stopWorkerCh chan struct{}
   315  	worker       sync.WaitGroup
   316  	scratch      bufalloc.ByteAllocator
   317  
   318  	// Only synchronized between the client goroutine and the worker goroutine.
   319  	mu struct {
   320  		syncutil.Mutex
   321  		inflight int64
   322  		flushErr error
   323  		flushCh  chan struct{}
   324  	}
   325  }
   326  
   327  func makeKafkaSink(
   328  	cfg kafkaSinkConfig, bootstrapServers string, targets jobspb.ChangefeedTargets,
   329  ) (Sink, error) {
   330  	sink := &kafkaSink{cfg: cfg}
   331  	sink.topics = make(map[string]struct{})
   332  	for _, t := range targets {
   333  		sink.topics[cfg.kafkaTopicPrefix+SQLNameToKafkaName(t.StatementTimeName)] = struct{}{}
   334  	}
   335  
   336  	config := sarama.NewConfig()
   337  	config.ClientID = `CockroachDB`
   338  	config.Producer.Return.Successes = true
   339  	config.Producer.Partitioner = newChangefeedPartitioner
   340  
   341  	if cfg.caCert != nil {
   342  		if !cfg.tlsEnabled {
   343  			return nil, errors.Errorf(`%s requires %s=true`, changefeedbase.SinkParamCACert, changefeedbase.SinkParamTLSEnabled)
   344  		}
   345  		caCertPool := x509.NewCertPool()
   346  		caCertPool.AppendCertsFromPEM(cfg.caCert)
   347  		config.Net.TLS.Config = &tls.Config{
   348  			RootCAs: caCertPool,
   349  		}
   350  		config.Net.TLS.Enable = true
   351  	} else if cfg.tlsEnabled {
   352  		config.Net.TLS.Enable = true
   353  	}
   354  
   355  	if cfg.clientCert != nil {
   356  		if !cfg.tlsEnabled {
   357  			return nil, errors.Errorf(`%s requires %s=true`, changefeedbase.SinkParamClientCert, changefeedbase.SinkParamTLSEnabled)
   358  		}
   359  		if cfg.clientKey == nil {
   360  			return nil, errors.Errorf(`%s requires %s to be set`, changefeedbase.SinkParamClientCert, changefeedbase.SinkParamClientKey)
   361  		}
   362  		cert, err := tls.X509KeyPair(cfg.clientCert, cfg.clientKey)
   363  		if err != nil {
   364  			return nil, errors.Errorf(`invalid client certificate data provided: %s`, err)
   365  		}
   366  		if config.Net.TLS.Config == nil {
   367  			config.Net.TLS.Config = &tls.Config{}
   368  		}
   369  		config.Net.TLS.Config.Certificates = []tls.Certificate{cert}
   370  	} else if cfg.clientKey != nil {
   371  		return nil, errors.Errorf(`%s requires %s to be set`, changefeedbase.SinkParamClientKey, changefeedbase.SinkParamClientCert)
   372  	}
   373  
   374  	if cfg.saslEnabled {
   375  		config.Net.SASL.Enable = true
   376  		config.Net.SASL.Handshake = cfg.saslHandshake
   377  		config.Net.SASL.User = cfg.saslUser
   378  		config.Net.SASL.Password = cfg.saslPassword
   379  	}
   380  
   381  	// When we emit messages to sarama, they're placed in a queue (as does any
   382  	// reasonable kafka producer client). When our sink's Flush is called, we
   383  	// have to wait for all buffered and inflight requests to be sent and then
   384  	// acknowledged. Quite unfortunately, we have no way to hint to the producer
   385  	// that it should immediately send out whatever is buffered. This
   386  	// configuration can have a dramatic impact on how quickly this happens
   387  	// naturally (and some configurations will block forever!).
   388  	//
   389  	// We can configure the producer to send out its batches based on number of
   390  	// messages and/or total buffered message size and/or time. If none of them
   391  	// are set, it uses some defaults, but if any of the three are set, it does
   392  	// no defaulting. Which means that if `Flush.Messages` is set to 10 and
   393  	// nothing else is set, then 9/10 times `Flush` will block forever. We can
   394  	// work around this by also setting `Flush.Frequency` but a cleaner way is
   395  	// to set `Flush.Messages` to 1. In the steady state, this sends a request
   396  	// with some messages, buffers any messages that come in while it is in
   397  	// flight, then sends those out.
   398  	config.Producer.Flush.Messages = 1
   399  
   400  	// This works around what seems to be a bug in sarama where it isn't
   401  	// computing the right value to compare against `Producer.MaxMessageBytes`
   402  	// and the server sends it back with a "Message was too large, server
   403  	// rejected it to avoid allocation" error. The other flush tunings are
   404  	// hints, but this one is a hard limit, so it's useful here as a workaround.
   405  	//
   406  	// This workaround should probably be something like setting
   407  	// `Producer.MaxMessageBytes` to 90% of it's value for some headroom, but
   408  	// this workaround is the one that's been running in roachtests and I'd want
   409  	// to test this one more before changing it.
   410  	config.Producer.Flush.MaxMessages = 1000
   411  
   412  	// config.Producer.Flush.Messages is set to 1 so we don't need this, but
   413  	// sarama prints scary things to the logs if we don't.
   414  	config.Producer.Flush.Frequency = time.Hour
   415  
   416  	var err error
   417  	sink.client, err = sarama.NewClient(strings.Split(bootstrapServers, `,`), config)
   418  	if err != nil {
   419  		err = pgerror.Wrapf(err, pgcode.CannotConnectNow,
   420  			`connecting to kafka: %s`, bootstrapServers)
   421  		return nil, err
   422  	}
   423  	sink.producer, err = sarama.NewAsyncProducerFromClient(sink.client)
   424  	if err != nil {
   425  		err = pgerror.Wrapf(err, pgcode.CannotConnectNow,
   426  			`connecting to kafka: %s`, bootstrapServers)
   427  		return nil, err
   428  	}
   429  
   430  	sink.start()
   431  	return sink, nil
   432  }
   433  
   434  func (s *kafkaSink) start() {
   435  	s.stopWorkerCh = make(chan struct{})
   436  	s.worker.Add(1)
   437  	go s.workerLoop()
   438  }
   439  
   440  // Close implements the Sink interface.
   441  func (s *kafkaSink) Close() error {
   442  	close(s.stopWorkerCh)
   443  	s.worker.Wait()
   444  
   445  	// If we're shutting down, we don't care what happens to the outstanding
   446  	// messages, so ignore this error.
   447  	_ = s.producer.Close()
   448  	// s.client is only nil in tests.
   449  	if s.client != nil {
   450  		return s.client.Close()
   451  	}
   452  	return nil
   453  }
   454  
   455  // EmitRow implements the Sink interface.
   456  func (s *kafkaSink) EmitRow(
   457  	ctx context.Context, table *sqlbase.TableDescriptor, key, value []byte, _ hlc.Timestamp,
   458  ) error {
   459  	topic := s.cfg.kafkaTopicPrefix + SQLNameToKafkaName(table.Name)
   460  	if _, ok := s.topics[topic]; !ok {
   461  		return errors.Errorf(`cannot emit to undeclared topic: %s`, topic)
   462  	}
   463  
   464  	msg := &sarama.ProducerMessage{
   465  		Topic: topic,
   466  		Key:   sarama.ByteEncoder(key),
   467  		Value: sarama.ByteEncoder(value),
   468  	}
   469  	return s.emitMessage(ctx, msg)
   470  }
   471  
   472  // EmitResolvedTimestamp implements the Sink interface.
   473  func (s *kafkaSink) EmitResolvedTimestamp(
   474  	ctx context.Context, encoder Encoder, resolved hlc.Timestamp,
   475  ) error {
   476  	// Periodically ping sarama to refresh its metadata. This means talking to
   477  	// zookeeper, so it shouldn't be done too often, but beyond that this
   478  	// constant was picked pretty arbitrarily.
   479  	//
   480  	// TODO(dan): Add a test for this. We can't right now (2018-11-13) because
   481  	// we'd need to bump sarama, but that's a bad idea while we're still
   482  	// actively working on stability. At the same time, revisit this tuning.
   483  	const metadataRefreshMinDuration = time.Minute
   484  	if timeutil.Since(s.lastMetadataRefresh) > metadataRefreshMinDuration {
   485  		topics := make([]string, 0, len(s.topics))
   486  		for topic := range s.topics {
   487  			topics = append(topics, topic)
   488  		}
   489  		if err := s.client.RefreshMetadata(topics...); err != nil {
   490  			return err
   491  		}
   492  		s.lastMetadataRefresh = timeutil.Now()
   493  	}
   494  
   495  	for topic := range s.topics {
   496  		payload, err := encoder.EncodeResolvedTimestamp(ctx, topic, resolved)
   497  		if err != nil {
   498  			return err
   499  		}
   500  		s.scratch, payload = s.scratch.Copy(payload, 0 /* extraCap */)
   501  
   502  		// sarama caches this, which is why we have to periodically refresh the
   503  		// metadata above. Staleness here does not impact correctness. Some new
   504  		// partitions will miss this resolved timestamp, but they'll eventually
   505  		// be picked up and get later ones.
   506  		partitions, err := s.client.Partitions(topic)
   507  		if err != nil {
   508  			return err
   509  		}
   510  		for _, partition := range partitions {
   511  			msg := &sarama.ProducerMessage{
   512  				Topic:     topic,
   513  				Partition: partition,
   514  				Key:       nil,
   515  				Value:     sarama.ByteEncoder(payload),
   516  			}
   517  			if err := s.emitMessage(ctx, msg); err != nil {
   518  				return err
   519  			}
   520  		}
   521  	}
   522  	return nil
   523  }
   524  
   525  // Flush implements the Sink interface.
   526  func (s *kafkaSink) Flush(ctx context.Context) error {
   527  	flushCh := make(chan struct{}, 1)
   528  
   529  	s.mu.Lock()
   530  	inflight := s.mu.inflight
   531  	flushErr := s.mu.flushErr
   532  	s.mu.flushErr = nil
   533  	immediateFlush := inflight == 0 || flushErr != nil
   534  	if !immediateFlush {
   535  		s.mu.flushCh = flushCh
   536  	}
   537  	s.mu.Unlock()
   538  
   539  	if immediateFlush {
   540  		return flushErr
   541  	}
   542  
   543  	if log.V(1) {
   544  		log.Infof(ctx, "flush waiting for %d inflight messages", inflight)
   545  	}
   546  	select {
   547  	case <-ctx.Done():
   548  		return ctx.Err()
   549  	case <-flushCh:
   550  		s.mu.Lock()
   551  		flushErr := s.mu.flushErr
   552  		s.mu.flushErr = nil
   553  		s.mu.Unlock()
   554  		return flushErr
   555  	}
   556  }
   557  
   558  func (s *kafkaSink) emitMessage(ctx context.Context, msg *sarama.ProducerMessage) error {
   559  	s.mu.Lock()
   560  	s.mu.inflight++
   561  	inflight := s.mu.inflight
   562  	s.mu.Unlock()
   563  
   564  	select {
   565  	case <-ctx.Done():
   566  		return ctx.Err()
   567  	case s.producer.Input() <- msg:
   568  	}
   569  
   570  	if log.V(2) {
   571  		log.Infof(ctx, "emitted %d inflight records to kafka", inflight)
   572  	}
   573  	return nil
   574  }
   575  
   576  func (s *kafkaSink) workerLoop() {
   577  	defer s.worker.Done()
   578  
   579  	for {
   580  		select {
   581  		case <-s.stopWorkerCh:
   582  			return
   583  		case <-s.producer.Successes():
   584  		case err := <-s.producer.Errors():
   585  			s.mu.Lock()
   586  			if s.mu.flushErr == nil {
   587  				s.mu.flushErr = err
   588  			}
   589  			s.mu.Unlock()
   590  		}
   591  
   592  		s.mu.Lock()
   593  		s.mu.inflight--
   594  		if s.mu.inflight == 0 && s.mu.flushCh != nil {
   595  			s.mu.flushCh <- struct{}{}
   596  			s.mu.flushCh = nil
   597  		}
   598  		s.mu.Unlock()
   599  	}
   600  }
   601  
   602  type changefeedPartitioner struct {
   603  	hash sarama.Partitioner
   604  }
   605  
   606  var _ sarama.Partitioner = &changefeedPartitioner{}
   607  var _ sarama.PartitionerConstructor = newChangefeedPartitioner
   608  
   609  func newChangefeedPartitioner(topic string) sarama.Partitioner {
   610  	return &changefeedPartitioner{
   611  		hash: sarama.NewHashPartitioner(topic),
   612  	}
   613  }
   614  
   615  func (p *changefeedPartitioner) RequiresConsistency() bool { return true }
   616  func (p *changefeedPartitioner) Partition(
   617  	message *sarama.ProducerMessage, numPartitions int32,
   618  ) (int32, error) {
   619  	if message.Key == nil {
   620  		return message.Partition, nil
   621  	}
   622  	return p.hash.Partition(message, numPartitions)
   623  }
   624  
   625  const (
   626  	sqlSinkCreateTableStmt = `CREATE TABLE IF NOT EXISTS "%s" (
   627  		topic STRING,
   628  		partition INT,
   629  		message_id INT,
   630  		key BYTES, value BYTES,
   631  		resolved BYTES,
   632  		PRIMARY KEY (topic, partition, message_id)
   633  	)`
   634  	sqlSinkEmitStmt = `INSERT INTO "%s" (topic, partition, message_id, key, value, resolved)`
   635  	sqlSinkEmitCols = 6
   636  	// Some amount of batching to mirror a bit how kafkaSink works.
   637  	sqlSinkRowBatchSize = 3
   638  	// While sqlSink is only used for testing, hardcode the number of
   639  	// partitions to something small but greater than 1.
   640  	sqlSinkNumPartitions = 3
   641  )
   642  
   643  // sqlSink mirrors the semantics offered by kafkaSink as closely as possible,
   644  // but writes to a SQL table (presumably in CockroachDB). Currently only for
   645  // testing.
   646  //
   647  // Each emitted row or resolved timestamp is stored as a row in the table. Each
   648  // table gets 3 partitions. Similar to kafkaSink, the order between two emits is
   649  // only preserved if they are emitted to by the same node and to the same
   650  // partition.
   651  type sqlSink struct {
   652  	db *gosql.DB
   653  
   654  	tableName string
   655  	topics    map[string]struct{}
   656  	hasher    hash.Hash32
   657  
   658  	rowBuf  []interface{}
   659  	scratch bufalloc.ByteAllocator
   660  }
   661  
   662  func makeSQLSink(uri, tableName string, targets jobspb.ChangefeedTargets) (*sqlSink, error) {
   663  	if u, err := url.Parse(uri); err != nil {
   664  		return nil, err
   665  	} else if u.Path == `` {
   666  		return nil, errors.Errorf(`must specify database`)
   667  	}
   668  	db, err := gosql.Open(`postgres`, uri)
   669  	if err != nil {
   670  		return nil, err
   671  	}
   672  	if _, err := db.Exec(fmt.Sprintf(sqlSinkCreateTableStmt, tableName)); err != nil {
   673  		db.Close()
   674  		return nil, err
   675  	}
   676  
   677  	s := &sqlSink{
   678  		db:        db,
   679  		tableName: tableName,
   680  		topics:    make(map[string]struct{}),
   681  		hasher:    fnv.New32a(),
   682  	}
   683  	for _, t := range targets {
   684  		s.topics[t.StatementTimeName] = struct{}{}
   685  	}
   686  	return s, nil
   687  }
   688  
   689  // EmitRow implements the Sink interface.
   690  func (s *sqlSink) EmitRow(
   691  	ctx context.Context, table *sqlbase.TableDescriptor, key, value []byte, _ hlc.Timestamp,
   692  ) error {
   693  	topic := table.Name
   694  	if _, ok := s.topics[topic]; !ok {
   695  		return errors.Errorf(`cannot emit to undeclared topic: %s`, topic)
   696  	}
   697  
   698  	// Hashing logic copied from sarama.HashPartitioner.
   699  	s.hasher.Reset()
   700  	if _, err := s.hasher.Write(key); err != nil {
   701  		return err
   702  	}
   703  	partition := int32(s.hasher.Sum32()) % sqlSinkNumPartitions
   704  	if partition < 0 {
   705  		partition = -partition
   706  	}
   707  
   708  	var noResolved []byte
   709  	return s.emit(ctx, topic, partition, key, value, noResolved)
   710  }
   711  
   712  // EmitResolvedTimestamp implements the Sink interface.
   713  func (s *sqlSink) EmitResolvedTimestamp(
   714  	ctx context.Context, encoder Encoder, resolved hlc.Timestamp,
   715  ) error {
   716  	var noKey, noValue []byte
   717  	for topic := range s.topics {
   718  		payload, err := encoder.EncodeResolvedTimestamp(ctx, topic, resolved)
   719  		if err != nil {
   720  			return err
   721  		}
   722  		s.scratch, payload = s.scratch.Copy(payload, 0 /* extraCap */)
   723  		for partition := int32(0); partition < sqlSinkNumPartitions; partition++ {
   724  			if err := s.emit(ctx, topic, partition, noKey, noValue, payload); err != nil {
   725  				return err
   726  			}
   727  		}
   728  	}
   729  	return nil
   730  }
   731  
   732  func (s *sqlSink) emit(
   733  	ctx context.Context, topic string, partition int32, key, value, resolved []byte,
   734  ) error {
   735  	// Generate the message id on the client to match the guaranttees of kafka
   736  	// (two messages are only guaranteed to keep their order if emitted from the
   737  	// same producer to the same partition).
   738  	messageID := builtins.GenerateUniqueInt(base.SQLInstanceID(partition))
   739  	s.rowBuf = append(s.rowBuf, topic, partition, messageID, key, value, resolved)
   740  	if len(s.rowBuf)/sqlSinkEmitCols >= sqlSinkRowBatchSize {
   741  		return s.Flush(ctx)
   742  	}
   743  	return nil
   744  }
   745  
   746  // Flush implements the Sink interface.
   747  func (s *sqlSink) Flush(ctx context.Context) error {
   748  	if len(s.rowBuf) == 0 {
   749  		return nil
   750  	}
   751  
   752  	var stmt strings.Builder
   753  	fmt.Fprintf(&stmt, sqlSinkEmitStmt, s.tableName)
   754  	for i := 0; i < len(s.rowBuf); i++ {
   755  		if i == 0 {
   756  			stmt.WriteString(` VALUES (`)
   757  		} else if i%sqlSinkEmitCols == 0 {
   758  			stmt.WriteString(`),(`)
   759  		} else {
   760  			stmt.WriteString(`,`)
   761  		}
   762  		fmt.Fprintf(&stmt, `$%d`, i+1)
   763  	}
   764  	stmt.WriteString(`)`)
   765  	_, err := s.db.Exec(stmt.String(), s.rowBuf...)
   766  	if err != nil {
   767  		return err
   768  	}
   769  	s.rowBuf = s.rowBuf[:0]
   770  	return nil
   771  }
   772  
   773  // Close implements the Sink interface.
   774  func (s *sqlSink) Close() error {
   775  	return s.db.Close()
   776  }
   777  
   778  // encDatumRowBuffer is a FIFO of `EncDatumRow`s.
   779  //
   780  // TODO(dan): There's some potential allocation savings here by reusing the same
   781  // backing array.
   782  type encDatumRowBuffer []sqlbase.EncDatumRow
   783  
   784  func (b *encDatumRowBuffer) IsEmpty() bool {
   785  	return b == nil || len(*b) == 0
   786  }
   787  func (b *encDatumRowBuffer) Push(r sqlbase.EncDatumRow) {
   788  	*b = append(*b, r)
   789  }
   790  func (b *encDatumRowBuffer) Pop() sqlbase.EncDatumRow {
   791  	ret := (*b)[0]
   792  	*b = (*b)[1:]
   793  	return ret
   794  }
   795  
   796  type bufferSink struct {
   797  	buf     encDatumRowBuffer
   798  	alloc   sqlbase.DatumAlloc
   799  	scratch bufalloc.ByteAllocator
   800  	closed  bool
   801  }
   802  
   803  // EmitRow implements the Sink interface.
   804  func (s *bufferSink) EmitRow(
   805  	_ context.Context, table *sqlbase.TableDescriptor, key, value []byte, _ hlc.Timestamp,
   806  ) error {
   807  	if s.closed {
   808  		return errors.New(`cannot EmitRow on a closed sink`)
   809  	}
   810  	topic := table.Name
   811  	s.buf.Push(sqlbase.EncDatumRow{
   812  		{Datum: tree.DNull}, // resolved span
   813  		{Datum: s.alloc.NewDString(tree.DString(topic))}, // topic
   814  		{Datum: s.alloc.NewDBytes(tree.DBytes(key))},     // key
   815  		{Datum: s.alloc.NewDBytes(tree.DBytes(value))},   //value
   816  	})
   817  	return nil
   818  }
   819  
   820  // EmitResolvedTimestamp implements the Sink interface.
   821  func (s *bufferSink) EmitResolvedTimestamp(
   822  	ctx context.Context, encoder Encoder, resolved hlc.Timestamp,
   823  ) error {
   824  	if s.closed {
   825  		return errors.New(`cannot EmitResolvedTimestamp on a closed sink`)
   826  	}
   827  	var noTopic string
   828  	payload, err := encoder.EncodeResolvedTimestamp(ctx, noTopic, resolved)
   829  	if err != nil {
   830  		return err
   831  	}
   832  	s.scratch, payload = s.scratch.Copy(payload, 0 /* extraCap */)
   833  	s.buf.Push(sqlbase.EncDatumRow{
   834  		{Datum: tree.DNull}, // resolved span
   835  		{Datum: tree.DNull}, // topic
   836  		{Datum: tree.DNull}, // key
   837  		{Datum: s.alloc.NewDBytes(tree.DBytes(payload))}, // value
   838  	})
   839  	return nil
   840  }
   841  
   842  // Flush implements the Sink interface.
   843  func (s *bufferSink) Flush(_ context.Context) error {
   844  	return nil
   845  }
   846  
   847  // Close implements the Sink interface.
   848  func (s *bufferSink) Close() error {
   849  	s.closed = true
   850  	return nil
   851  }