github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/sink.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package changefeedccl 10 11 import ( 12 "context" 13 "crypto/tls" 14 "crypto/x509" 15 gosql "database/sql" 16 "encoding/base64" 17 "fmt" 18 "hash" 19 "hash/fnv" 20 "net/url" 21 "strconv" 22 "strings" 23 "sync" 24 "time" 25 26 "github.com/Shopify/sarama" 27 "github.com/cockroachdb/cockroach/pkg/base" 28 "github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/changefeedbase" 29 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 30 "github.com/cockroachdb/cockroach/pkg/roachpb" 31 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 32 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 33 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 34 "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins" 35 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 36 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 37 "github.com/cockroachdb/cockroach/pkg/storage/cloud" 38 "github.com/cockroachdb/cockroach/pkg/util/bufalloc" 39 "github.com/cockroachdb/cockroach/pkg/util/hlc" 40 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 41 "github.com/cockroachdb/cockroach/pkg/util/log" 42 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 43 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 44 "github.com/cockroachdb/errors" 45 "github.com/cockroachdb/logtags" 46 ) 47 48 // Sink is an abstraction for anything that a changefeed may emit into. 49 type Sink interface { 50 // EmitRow enqueues a row message for asynchronous delivery on the sink. An 51 // error may be returned if a previously enqueued message has failed. 52 EmitRow( 53 ctx context.Context, 54 table *sqlbase.TableDescriptor, 55 key, value []byte, 56 updated hlc.Timestamp, 57 ) error 58 // EmitResolvedTimestamp enqueues a resolved timestamp message for 59 // asynchronous delivery on every topic that has been seen by EmitRow. An 60 // error may be returned if a previously enqueued message has failed. 61 EmitResolvedTimestamp(ctx context.Context, encoder Encoder, resolved hlc.Timestamp) error 62 // Flush blocks until every message enqueued by EmitRow and 63 // EmitResolvedTimestamp has been acknowledged by the sink. If an error is 64 // returned, no guarantees are given about which messages have been 65 // delivered or not delivered. 66 Flush(ctx context.Context) error 67 // Close does not guarantee delivery of outstanding messages. 68 Close() error 69 } 70 71 func getSink( 72 ctx context.Context, 73 sinkURI string, 74 nodeID roachpb.NodeID, 75 opts map[string]string, 76 targets jobspb.ChangefeedTargets, 77 settings *cluster.Settings, 78 timestampOracle timestampLowerBoundOracle, 79 makeExternalStorageFromURI cloud.ExternalStorageFromURIFactory, 80 ) (Sink, error) { 81 u, err := url.Parse(sinkURI) 82 if err != nil { 83 return nil, err 84 } 85 q := u.Query() 86 87 // Use a function here to delay creation of the sink until after we've done 88 // all the parameter verification. 89 var makeSink func() (Sink, error) 90 switch { 91 case u.Scheme == changefeedbase.SinkSchemeBuffer: 92 makeSink = func() (Sink, error) { return &bufferSink{}, nil } 93 case u.Scheme == changefeedbase.SinkSchemeKafka: 94 var cfg kafkaSinkConfig 95 cfg.kafkaTopicPrefix = q.Get(changefeedbase.SinkParamTopicPrefix) 96 q.Del(changefeedbase.SinkParamTopicPrefix) 97 if schemaTopic := q.Get(changefeedbase.SinkParamSchemaTopic); schemaTopic != `` { 98 return nil, errors.Errorf(`%s is not yet supported`, changefeedbase.SinkParamSchemaTopic) 99 } 100 q.Del(changefeedbase.SinkParamSchemaTopic) 101 if tlsBool := q.Get(changefeedbase.SinkParamTLSEnabled); tlsBool != `` { 102 var err error 103 if cfg.tlsEnabled, err = strconv.ParseBool(tlsBool); err != nil { 104 return nil, errors.Errorf(`param %s must be a bool: %s`, changefeedbase.SinkParamTLSEnabled, err) 105 } 106 } 107 q.Del(changefeedbase.SinkParamTLSEnabled) 108 if caCertHex := q.Get(changefeedbase.SinkParamCACert); caCertHex != `` { 109 // TODO(dan): There's a straightforward and unambiguous transformation 110 // between the base 64 encoding defined in RFC 4648 and the URL variant 111 // defined in the same RFC: simply replace all `+` with `-` and `/` with 112 // `_`. Consider always doing this for the user and accepting either 113 // variant. 114 if cfg.caCert, err = base64.StdEncoding.DecodeString(caCertHex); err != nil { 115 return nil, errors.Errorf(`param %s must be base 64 encoded: %s`, changefeedbase.SinkParamCACert, err) 116 } 117 } 118 q.Del(changefeedbase.SinkParamCACert) 119 if clientCertHex := q.Get(changefeedbase.SinkParamClientCert); clientCertHex != `` { 120 if cfg.clientCert, err = base64.StdEncoding.DecodeString(clientCertHex); err != nil { 121 return nil, errors.Errorf(`param %s must be base 64 encoded: %s`, changefeedbase.SinkParamClientCert, err) 122 } 123 } 124 q.Del(changefeedbase.SinkParamClientCert) 125 if clientKeyHex := q.Get(changefeedbase.SinkParamClientKey); clientKeyHex != `` { 126 if cfg.clientKey, err = base64.StdEncoding.DecodeString(clientKeyHex); err != nil { 127 return nil, errors.Errorf(`param %s must be base 64 encoded: %s`, changefeedbase.SinkParamClientKey, err) 128 } 129 } 130 q.Del(changefeedbase.SinkParamClientKey) 131 132 saslParam := q.Get(changefeedbase.SinkParamSASLEnabled) 133 q.Del(changefeedbase.SinkParamSASLEnabled) 134 if saslParam != `` { 135 b, err := strconv.ParseBool(saslParam) 136 if err != nil { 137 return nil, errors.Wrapf(err, `param %s must be a bool:`, changefeedbase.SinkParamSASLEnabled) 138 } 139 cfg.saslEnabled = b 140 } 141 handshakeParam := q.Get(changefeedbase.SinkParamSASLHandshake) 142 q.Del(changefeedbase.SinkParamSASLHandshake) 143 if handshakeParam == `` { 144 cfg.saslHandshake = true 145 } else { 146 if !cfg.saslEnabled { 147 return nil, errors.Errorf(`%s must be enabled to configure SASL handshake behavior`, changefeedbase.SinkParamSASLEnabled) 148 } 149 b, err := strconv.ParseBool(handshakeParam) 150 if err != nil { 151 return nil, errors.Wrapf(err, `param %s must be a bool:`, changefeedbase.SinkParamSASLHandshake) 152 } 153 cfg.saslHandshake = b 154 } 155 cfg.saslUser = q.Get(changefeedbase.SinkParamSASLUser) 156 q.Del(changefeedbase.SinkParamSASLUser) 157 cfg.saslPassword = q.Get(changefeedbase.SinkParamSASLPassword) 158 q.Del(changefeedbase.SinkParamSASLPassword) 159 if cfg.saslEnabled { 160 if cfg.saslUser == `` { 161 return nil, errors.Errorf(`%s must be provided when SASL is enabled`, changefeedbase.SinkParamSASLUser) 162 } 163 if cfg.saslPassword == `` { 164 return nil, errors.Errorf(`%s must be provided when SASL is enabled`, changefeedbase.SinkParamSASLPassword) 165 } 166 } else { 167 if cfg.saslUser != `` { 168 return nil, errors.Errorf(`%s must be enabled if a SASL user is provided`, changefeedbase.SinkParamSASLEnabled) 169 } 170 if cfg.saslPassword != `` { 171 return nil, errors.Errorf(`%s must be enabled if a SASL password is provided`, changefeedbase.SinkParamSASLEnabled) 172 } 173 } 174 175 makeSink = func() (Sink, error) { 176 return makeKafkaSink(cfg, u.Host, targets) 177 } 178 case isCloudStorageSink(u): 179 fileSizeParam := q.Get(changefeedbase.SinkParamFileSize) 180 q.Del(changefeedbase.SinkParamFileSize) 181 var fileSize int64 = 16 << 20 // 16MB 182 if fileSizeParam != `` { 183 if fileSize, err = humanizeutil.ParseBytes(fileSizeParam); err != nil { 184 return nil, pgerror.Wrapf(err, pgcode.Syntax, `parsing %s`, fileSizeParam) 185 } 186 } 187 u.Scheme = strings.TrimPrefix(u.Scheme, `experimental-`) 188 // Transfer "ownership" of validating all remaining query parameters to 189 // ExternalStorage. 190 u.RawQuery = q.Encode() 191 q = url.Values{} 192 makeSink = func() (Sink, error) { 193 return makeCloudStorageSink( 194 ctx, u.String(), nodeID, fileSize, settings, 195 opts, timestampOracle, makeExternalStorageFromURI, 196 ) 197 } 198 case u.Scheme == changefeedbase.SinkSchemeExperimentalSQL: 199 // Swap the changefeed prefix for the sql connection one that sqlSink 200 // expects. 201 u.Scheme = `postgres` 202 // TODO(dan): Make tableName configurable or based on the job ID or 203 // something. 204 tableName := `sqlsink` 205 makeSink = func() (Sink, error) { 206 return makeSQLSink(u.String(), tableName, targets) 207 } 208 // Remove parameters we know about for the unknown parameter check. 209 q.Del(`sslcert`) 210 q.Del(`sslkey`) 211 q.Del(`sslmode`) 212 q.Del(`sslrootcert`) 213 default: 214 return nil, errors.Errorf(`unsupported sink: %s`, u.Scheme) 215 } 216 217 for k := range q { 218 return nil, errors.Errorf(`unknown sink query parameter: %s`, k) 219 } 220 221 s, err := makeSink() 222 if err != nil { 223 return nil, err 224 } 225 return s, nil 226 } 227 228 // errorWrapperSink delegates to another sink and marks all returned errors as 229 // retryable. During changefeed setup, we use the sink once without this to 230 // verify configuration, but in the steady state, no sink error should be 231 // terminal. 232 type errorWrapperSink struct { 233 wrapped Sink 234 } 235 236 func (s errorWrapperSink) EmitRow( 237 ctx context.Context, table *sqlbase.TableDescriptor, key, value []byte, updated hlc.Timestamp, 238 ) error { 239 if err := s.wrapped.EmitRow(ctx, table, key, value, updated); err != nil { 240 return MarkRetryableError(err) 241 } 242 return nil 243 } 244 245 func (s errorWrapperSink) EmitResolvedTimestamp( 246 ctx context.Context, encoder Encoder, resolved hlc.Timestamp, 247 ) error { 248 if err := s.wrapped.EmitResolvedTimestamp(ctx, encoder, resolved); err != nil { 249 return MarkRetryableError(err) 250 } 251 return nil 252 } 253 254 func (s errorWrapperSink) Flush(ctx context.Context) error { 255 if err := s.wrapped.Flush(ctx); err != nil { 256 return MarkRetryableError(err) 257 } 258 return nil 259 } 260 261 func (s errorWrapperSink) Close() error { 262 if err := s.wrapped.Close(); err != nil { 263 return MarkRetryableError(err) 264 } 265 return nil 266 } 267 268 type kafkaLogAdapter struct { 269 ctx context.Context 270 } 271 272 var _ sarama.StdLogger = (*kafkaLogAdapter)(nil) 273 274 func (l *kafkaLogAdapter) Print(v ...interface{}) { 275 log.InfofDepth(l.ctx, 1, "", v...) 276 } 277 func (l *kafkaLogAdapter) Printf(format string, v ...interface{}) { 278 log.InfofDepth(l.ctx, 1, format, v...) 279 } 280 func (l *kafkaLogAdapter) Println(v ...interface{}) { 281 log.InfofDepth(l.ctx, 1, "", v...) 282 } 283 284 func init() { 285 // We'd much prefer to make one of these per sink, so we can use the real 286 // context, but quite unfortunately, sarama only has a global logger hook. 287 ctx := context.Background() 288 ctx = logtags.AddTag(ctx, "kafka-producer", nil) 289 sarama.Logger = &kafkaLogAdapter{ctx: ctx} 290 } 291 292 type kafkaSinkConfig struct { 293 kafkaTopicPrefix string 294 tlsEnabled bool 295 caCert []byte 296 clientCert []byte 297 clientKey []byte 298 saslEnabled bool 299 saslHandshake bool 300 saslUser string 301 saslPassword string 302 } 303 304 // kafkaSink emits to Kafka asynchronously. It is not concurrency-safe; all 305 // calls to Emit and Flush should be from the same goroutine. 306 type kafkaSink struct { 307 cfg kafkaSinkConfig 308 client sarama.Client 309 producer sarama.AsyncProducer 310 topics map[string]struct{} 311 312 lastMetadataRefresh time.Time 313 314 stopWorkerCh chan struct{} 315 worker sync.WaitGroup 316 scratch bufalloc.ByteAllocator 317 318 // Only synchronized between the client goroutine and the worker goroutine. 319 mu struct { 320 syncutil.Mutex 321 inflight int64 322 flushErr error 323 flushCh chan struct{} 324 } 325 } 326 327 func makeKafkaSink( 328 cfg kafkaSinkConfig, bootstrapServers string, targets jobspb.ChangefeedTargets, 329 ) (Sink, error) { 330 sink := &kafkaSink{cfg: cfg} 331 sink.topics = make(map[string]struct{}) 332 for _, t := range targets { 333 sink.topics[cfg.kafkaTopicPrefix+SQLNameToKafkaName(t.StatementTimeName)] = struct{}{} 334 } 335 336 config := sarama.NewConfig() 337 config.ClientID = `CockroachDB` 338 config.Producer.Return.Successes = true 339 config.Producer.Partitioner = newChangefeedPartitioner 340 341 if cfg.caCert != nil { 342 if !cfg.tlsEnabled { 343 return nil, errors.Errorf(`%s requires %s=true`, changefeedbase.SinkParamCACert, changefeedbase.SinkParamTLSEnabled) 344 } 345 caCertPool := x509.NewCertPool() 346 caCertPool.AppendCertsFromPEM(cfg.caCert) 347 config.Net.TLS.Config = &tls.Config{ 348 RootCAs: caCertPool, 349 } 350 config.Net.TLS.Enable = true 351 } else if cfg.tlsEnabled { 352 config.Net.TLS.Enable = true 353 } 354 355 if cfg.clientCert != nil { 356 if !cfg.tlsEnabled { 357 return nil, errors.Errorf(`%s requires %s=true`, changefeedbase.SinkParamClientCert, changefeedbase.SinkParamTLSEnabled) 358 } 359 if cfg.clientKey == nil { 360 return nil, errors.Errorf(`%s requires %s to be set`, changefeedbase.SinkParamClientCert, changefeedbase.SinkParamClientKey) 361 } 362 cert, err := tls.X509KeyPair(cfg.clientCert, cfg.clientKey) 363 if err != nil { 364 return nil, errors.Errorf(`invalid client certificate data provided: %s`, err) 365 } 366 if config.Net.TLS.Config == nil { 367 config.Net.TLS.Config = &tls.Config{} 368 } 369 config.Net.TLS.Config.Certificates = []tls.Certificate{cert} 370 } else if cfg.clientKey != nil { 371 return nil, errors.Errorf(`%s requires %s to be set`, changefeedbase.SinkParamClientKey, changefeedbase.SinkParamClientCert) 372 } 373 374 if cfg.saslEnabled { 375 config.Net.SASL.Enable = true 376 config.Net.SASL.Handshake = cfg.saslHandshake 377 config.Net.SASL.User = cfg.saslUser 378 config.Net.SASL.Password = cfg.saslPassword 379 } 380 381 // When we emit messages to sarama, they're placed in a queue (as does any 382 // reasonable kafka producer client). When our sink's Flush is called, we 383 // have to wait for all buffered and inflight requests to be sent and then 384 // acknowledged. Quite unfortunately, we have no way to hint to the producer 385 // that it should immediately send out whatever is buffered. This 386 // configuration can have a dramatic impact on how quickly this happens 387 // naturally (and some configurations will block forever!). 388 // 389 // We can configure the producer to send out its batches based on number of 390 // messages and/or total buffered message size and/or time. If none of them 391 // are set, it uses some defaults, but if any of the three are set, it does 392 // no defaulting. Which means that if `Flush.Messages` is set to 10 and 393 // nothing else is set, then 9/10 times `Flush` will block forever. We can 394 // work around this by also setting `Flush.Frequency` but a cleaner way is 395 // to set `Flush.Messages` to 1. In the steady state, this sends a request 396 // with some messages, buffers any messages that come in while it is in 397 // flight, then sends those out. 398 config.Producer.Flush.Messages = 1 399 400 // This works around what seems to be a bug in sarama where it isn't 401 // computing the right value to compare against `Producer.MaxMessageBytes` 402 // and the server sends it back with a "Message was too large, server 403 // rejected it to avoid allocation" error. The other flush tunings are 404 // hints, but this one is a hard limit, so it's useful here as a workaround. 405 // 406 // This workaround should probably be something like setting 407 // `Producer.MaxMessageBytes` to 90% of it's value for some headroom, but 408 // this workaround is the one that's been running in roachtests and I'd want 409 // to test this one more before changing it. 410 config.Producer.Flush.MaxMessages = 1000 411 412 // config.Producer.Flush.Messages is set to 1 so we don't need this, but 413 // sarama prints scary things to the logs if we don't. 414 config.Producer.Flush.Frequency = time.Hour 415 416 var err error 417 sink.client, err = sarama.NewClient(strings.Split(bootstrapServers, `,`), config) 418 if err != nil { 419 err = pgerror.Wrapf(err, pgcode.CannotConnectNow, 420 `connecting to kafka: %s`, bootstrapServers) 421 return nil, err 422 } 423 sink.producer, err = sarama.NewAsyncProducerFromClient(sink.client) 424 if err != nil { 425 err = pgerror.Wrapf(err, pgcode.CannotConnectNow, 426 `connecting to kafka: %s`, bootstrapServers) 427 return nil, err 428 } 429 430 sink.start() 431 return sink, nil 432 } 433 434 func (s *kafkaSink) start() { 435 s.stopWorkerCh = make(chan struct{}) 436 s.worker.Add(1) 437 go s.workerLoop() 438 } 439 440 // Close implements the Sink interface. 441 func (s *kafkaSink) Close() error { 442 close(s.stopWorkerCh) 443 s.worker.Wait() 444 445 // If we're shutting down, we don't care what happens to the outstanding 446 // messages, so ignore this error. 447 _ = s.producer.Close() 448 // s.client is only nil in tests. 449 if s.client != nil { 450 return s.client.Close() 451 } 452 return nil 453 } 454 455 // EmitRow implements the Sink interface. 456 func (s *kafkaSink) EmitRow( 457 ctx context.Context, table *sqlbase.TableDescriptor, key, value []byte, _ hlc.Timestamp, 458 ) error { 459 topic := s.cfg.kafkaTopicPrefix + SQLNameToKafkaName(table.Name) 460 if _, ok := s.topics[topic]; !ok { 461 return errors.Errorf(`cannot emit to undeclared topic: %s`, topic) 462 } 463 464 msg := &sarama.ProducerMessage{ 465 Topic: topic, 466 Key: sarama.ByteEncoder(key), 467 Value: sarama.ByteEncoder(value), 468 } 469 return s.emitMessage(ctx, msg) 470 } 471 472 // EmitResolvedTimestamp implements the Sink interface. 473 func (s *kafkaSink) EmitResolvedTimestamp( 474 ctx context.Context, encoder Encoder, resolved hlc.Timestamp, 475 ) error { 476 // Periodically ping sarama to refresh its metadata. This means talking to 477 // zookeeper, so it shouldn't be done too often, but beyond that this 478 // constant was picked pretty arbitrarily. 479 // 480 // TODO(dan): Add a test for this. We can't right now (2018-11-13) because 481 // we'd need to bump sarama, but that's a bad idea while we're still 482 // actively working on stability. At the same time, revisit this tuning. 483 const metadataRefreshMinDuration = time.Minute 484 if timeutil.Since(s.lastMetadataRefresh) > metadataRefreshMinDuration { 485 topics := make([]string, 0, len(s.topics)) 486 for topic := range s.topics { 487 topics = append(topics, topic) 488 } 489 if err := s.client.RefreshMetadata(topics...); err != nil { 490 return err 491 } 492 s.lastMetadataRefresh = timeutil.Now() 493 } 494 495 for topic := range s.topics { 496 payload, err := encoder.EncodeResolvedTimestamp(ctx, topic, resolved) 497 if err != nil { 498 return err 499 } 500 s.scratch, payload = s.scratch.Copy(payload, 0 /* extraCap */) 501 502 // sarama caches this, which is why we have to periodically refresh the 503 // metadata above. Staleness here does not impact correctness. Some new 504 // partitions will miss this resolved timestamp, but they'll eventually 505 // be picked up and get later ones. 506 partitions, err := s.client.Partitions(topic) 507 if err != nil { 508 return err 509 } 510 for _, partition := range partitions { 511 msg := &sarama.ProducerMessage{ 512 Topic: topic, 513 Partition: partition, 514 Key: nil, 515 Value: sarama.ByteEncoder(payload), 516 } 517 if err := s.emitMessage(ctx, msg); err != nil { 518 return err 519 } 520 } 521 } 522 return nil 523 } 524 525 // Flush implements the Sink interface. 526 func (s *kafkaSink) Flush(ctx context.Context) error { 527 flushCh := make(chan struct{}, 1) 528 529 s.mu.Lock() 530 inflight := s.mu.inflight 531 flushErr := s.mu.flushErr 532 s.mu.flushErr = nil 533 immediateFlush := inflight == 0 || flushErr != nil 534 if !immediateFlush { 535 s.mu.flushCh = flushCh 536 } 537 s.mu.Unlock() 538 539 if immediateFlush { 540 return flushErr 541 } 542 543 if log.V(1) { 544 log.Infof(ctx, "flush waiting for %d inflight messages", inflight) 545 } 546 select { 547 case <-ctx.Done(): 548 return ctx.Err() 549 case <-flushCh: 550 s.mu.Lock() 551 flushErr := s.mu.flushErr 552 s.mu.flushErr = nil 553 s.mu.Unlock() 554 return flushErr 555 } 556 } 557 558 func (s *kafkaSink) emitMessage(ctx context.Context, msg *sarama.ProducerMessage) error { 559 s.mu.Lock() 560 s.mu.inflight++ 561 inflight := s.mu.inflight 562 s.mu.Unlock() 563 564 select { 565 case <-ctx.Done(): 566 return ctx.Err() 567 case s.producer.Input() <- msg: 568 } 569 570 if log.V(2) { 571 log.Infof(ctx, "emitted %d inflight records to kafka", inflight) 572 } 573 return nil 574 } 575 576 func (s *kafkaSink) workerLoop() { 577 defer s.worker.Done() 578 579 for { 580 select { 581 case <-s.stopWorkerCh: 582 return 583 case <-s.producer.Successes(): 584 case err := <-s.producer.Errors(): 585 s.mu.Lock() 586 if s.mu.flushErr == nil { 587 s.mu.flushErr = err 588 } 589 s.mu.Unlock() 590 } 591 592 s.mu.Lock() 593 s.mu.inflight-- 594 if s.mu.inflight == 0 && s.mu.flushCh != nil { 595 s.mu.flushCh <- struct{}{} 596 s.mu.flushCh = nil 597 } 598 s.mu.Unlock() 599 } 600 } 601 602 type changefeedPartitioner struct { 603 hash sarama.Partitioner 604 } 605 606 var _ sarama.Partitioner = &changefeedPartitioner{} 607 var _ sarama.PartitionerConstructor = newChangefeedPartitioner 608 609 func newChangefeedPartitioner(topic string) sarama.Partitioner { 610 return &changefeedPartitioner{ 611 hash: sarama.NewHashPartitioner(topic), 612 } 613 } 614 615 func (p *changefeedPartitioner) RequiresConsistency() bool { return true } 616 func (p *changefeedPartitioner) Partition( 617 message *sarama.ProducerMessage, numPartitions int32, 618 ) (int32, error) { 619 if message.Key == nil { 620 return message.Partition, nil 621 } 622 return p.hash.Partition(message, numPartitions) 623 } 624 625 const ( 626 sqlSinkCreateTableStmt = `CREATE TABLE IF NOT EXISTS "%s" ( 627 topic STRING, 628 partition INT, 629 message_id INT, 630 key BYTES, value BYTES, 631 resolved BYTES, 632 PRIMARY KEY (topic, partition, message_id) 633 )` 634 sqlSinkEmitStmt = `INSERT INTO "%s" (topic, partition, message_id, key, value, resolved)` 635 sqlSinkEmitCols = 6 636 // Some amount of batching to mirror a bit how kafkaSink works. 637 sqlSinkRowBatchSize = 3 638 // While sqlSink is only used for testing, hardcode the number of 639 // partitions to something small but greater than 1. 640 sqlSinkNumPartitions = 3 641 ) 642 643 // sqlSink mirrors the semantics offered by kafkaSink as closely as possible, 644 // but writes to a SQL table (presumably in CockroachDB). Currently only for 645 // testing. 646 // 647 // Each emitted row or resolved timestamp is stored as a row in the table. Each 648 // table gets 3 partitions. Similar to kafkaSink, the order between two emits is 649 // only preserved if they are emitted to by the same node and to the same 650 // partition. 651 type sqlSink struct { 652 db *gosql.DB 653 654 tableName string 655 topics map[string]struct{} 656 hasher hash.Hash32 657 658 rowBuf []interface{} 659 scratch bufalloc.ByteAllocator 660 } 661 662 func makeSQLSink(uri, tableName string, targets jobspb.ChangefeedTargets) (*sqlSink, error) { 663 if u, err := url.Parse(uri); err != nil { 664 return nil, err 665 } else if u.Path == `` { 666 return nil, errors.Errorf(`must specify database`) 667 } 668 db, err := gosql.Open(`postgres`, uri) 669 if err != nil { 670 return nil, err 671 } 672 if _, err := db.Exec(fmt.Sprintf(sqlSinkCreateTableStmt, tableName)); err != nil { 673 db.Close() 674 return nil, err 675 } 676 677 s := &sqlSink{ 678 db: db, 679 tableName: tableName, 680 topics: make(map[string]struct{}), 681 hasher: fnv.New32a(), 682 } 683 for _, t := range targets { 684 s.topics[t.StatementTimeName] = struct{}{} 685 } 686 return s, nil 687 } 688 689 // EmitRow implements the Sink interface. 690 func (s *sqlSink) EmitRow( 691 ctx context.Context, table *sqlbase.TableDescriptor, key, value []byte, _ hlc.Timestamp, 692 ) error { 693 topic := table.Name 694 if _, ok := s.topics[topic]; !ok { 695 return errors.Errorf(`cannot emit to undeclared topic: %s`, topic) 696 } 697 698 // Hashing logic copied from sarama.HashPartitioner. 699 s.hasher.Reset() 700 if _, err := s.hasher.Write(key); err != nil { 701 return err 702 } 703 partition := int32(s.hasher.Sum32()) % sqlSinkNumPartitions 704 if partition < 0 { 705 partition = -partition 706 } 707 708 var noResolved []byte 709 return s.emit(ctx, topic, partition, key, value, noResolved) 710 } 711 712 // EmitResolvedTimestamp implements the Sink interface. 713 func (s *sqlSink) EmitResolvedTimestamp( 714 ctx context.Context, encoder Encoder, resolved hlc.Timestamp, 715 ) error { 716 var noKey, noValue []byte 717 for topic := range s.topics { 718 payload, err := encoder.EncodeResolvedTimestamp(ctx, topic, resolved) 719 if err != nil { 720 return err 721 } 722 s.scratch, payload = s.scratch.Copy(payload, 0 /* extraCap */) 723 for partition := int32(0); partition < sqlSinkNumPartitions; partition++ { 724 if err := s.emit(ctx, topic, partition, noKey, noValue, payload); err != nil { 725 return err 726 } 727 } 728 } 729 return nil 730 } 731 732 func (s *sqlSink) emit( 733 ctx context.Context, topic string, partition int32, key, value, resolved []byte, 734 ) error { 735 // Generate the message id on the client to match the guaranttees of kafka 736 // (two messages are only guaranteed to keep their order if emitted from the 737 // same producer to the same partition). 738 messageID := builtins.GenerateUniqueInt(base.SQLInstanceID(partition)) 739 s.rowBuf = append(s.rowBuf, topic, partition, messageID, key, value, resolved) 740 if len(s.rowBuf)/sqlSinkEmitCols >= sqlSinkRowBatchSize { 741 return s.Flush(ctx) 742 } 743 return nil 744 } 745 746 // Flush implements the Sink interface. 747 func (s *sqlSink) Flush(ctx context.Context) error { 748 if len(s.rowBuf) == 0 { 749 return nil 750 } 751 752 var stmt strings.Builder 753 fmt.Fprintf(&stmt, sqlSinkEmitStmt, s.tableName) 754 for i := 0; i < len(s.rowBuf); i++ { 755 if i == 0 { 756 stmt.WriteString(` VALUES (`) 757 } else if i%sqlSinkEmitCols == 0 { 758 stmt.WriteString(`),(`) 759 } else { 760 stmt.WriteString(`,`) 761 } 762 fmt.Fprintf(&stmt, `$%d`, i+1) 763 } 764 stmt.WriteString(`)`) 765 _, err := s.db.Exec(stmt.String(), s.rowBuf...) 766 if err != nil { 767 return err 768 } 769 s.rowBuf = s.rowBuf[:0] 770 return nil 771 } 772 773 // Close implements the Sink interface. 774 func (s *sqlSink) Close() error { 775 return s.db.Close() 776 } 777 778 // encDatumRowBuffer is a FIFO of `EncDatumRow`s. 779 // 780 // TODO(dan): There's some potential allocation savings here by reusing the same 781 // backing array. 782 type encDatumRowBuffer []sqlbase.EncDatumRow 783 784 func (b *encDatumRowBuffer) IsEmpty() bool { 785 return b == nil || len(*b) == 0 786 } 787 func (b *encDatumRowBuffer) Push(r sqlbase.EncDatumRow) { 788 *b = append(*b, r) 789 } 790 func (b *encDatumRowBuffer) Pop() sqlbase.EncDatumRow { 791 ret := (*b)[0] 792 *b = (*b)[1:] 793 return ret 794 } 795 796 type bufferSink struct { 797 buf encDatumRowBuffer 798 alloc sqlbase.DatumAlloc 799 scratch bufalloc.ByteAllocator 800 closed bool 801 } 802 803 // EmitRow implements the Sink interface. 804 func (s *bufferSink) EmitRow( 805 _ context.Context, table *sqlbase.TableDescriptor, key, value []byte, _ hlc.Timestamp, 806 ) error { 807 if s.closed { 808 return errors.New(`cannot EmitRow on a closed sink`) 809 } 810 topic := table.Name 811 s.buf.Push(sqlbase.EncDatumRow{ 812 {Datum: tree.DNull}, // resolved span 813 {Datum: s.alloc.NewDString(tree.DString(topic))}, // topic 814 {Datum: s.alloc.NewDBytes(tree.DBytes(key))}, // key 815 {Datum: s.alloc.NewDBytes(tree.DBytes(value))}, //value 816 }) 817 return nil 818 } 819 820 // EmitResolvedTimestamp implements the Sink interface. 821 func (s *bufferSink) EmitResolvedTimestamp( 822 ctx context.Context, encoder Encoder, resolved hlc.Timestamp, 823 ) error { 824 if s.closed { 825 return errors.New(`cannot EmitResolvedTimestamp on a closed sink`) 826 } 827 var noTopic string 828 payload, err := encoder.EncodeResolvedTimestamp(ctx, noTopic, resolved) 829 if err != nil { 830 return err 831 } 832 s.scratch, payload = s.scratch.Copy(payload, 0 /* extraCap */) 833 s.buf.Push(sqlbase.EncDatumRow{ 834 {Datum: tree.DNull}, // resolved span 835 {Datum: tree.DNull}, // topic 836 {Datum: tree.DNull}, // key 837 {Datum: s.alloc.NewDBytes(tree.DBytes(payload))}, // value 838 }) 839 return nil 840 } 841 842 // Flush implements the Sink interface. 843 func (s *bufferSink) Flush(_ context.Context) error { 844 return nil 845 } 846 847 // Close implements the Sink interface. 848 func (s *bufferSink) Close() error { 849 s.closed = true 850 return nil 851 }