github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/config/sink.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package config 15 16 import ( 17 "fmt" 18 "net/url" 19 "strconv" 20 "strings" 21 "time" 22 23 "github.com/apache/pulsar-client-go/pulsar" 24 "github.com/aws/aws-sdk-go-v2/aws" 25 "github.com/pingcap/errors" 26 "github.com/pingcap/log" 27 cerror "github.com/pingcap/tiflow/pkg/errors" 28 "github.com/pingcap/tiflow/pkg/sink" 29 "github.com/pingcap/tiflow/pkg/util" 30 "go.uber.org/zap" 31 ) 32 33 const ( 34 // DefaultMaxMessageBytes sets the default value for max-message-bytes. 35 DefaultMaxMessageBytes = 10 * 1024 * 1024 // 10M 36 // DefaultAdvanceTimeoutInSec sets the default value for advance-timeout-in-sec. 37 DefaultAdvanceTimeoutInSec = uint(150) 38 39 // TxnAtomicityKey specifies the key of the transaction-atomicity in the SinkURI. 40 TxnAtomicityKey = "transaction-atomicity" 41 // defaultTxnAtomicity is the default atomicity level. 42 defaultTxnAtomicity = noneTxnAtomicity 43 // unknownTxnAtomicity is an invalid atomicity level and will be treated as 44 // defaultTxnAtomicity when initializing sink in processor. 45 unknownTxnAtomicity AtomicityLevel = "" 46 // noneTxnAtomicity means atomicity of transactions is not guaranteed 47 noneTxnAtomicity AtomicityLevel = "none" 48 // tableTxnAtomicity means atomicity of single table transactions is guaranteed. 49 tableTxnAtomicity AtomicityLevel = "table" 50 51 // Comma is a constant for ',' 52 Comma = "," 53 // CR is an abbreviation for carriage return 54 CR = '\r' 55 // LF is an abbreviation for line feed 56 LF = '\n' 57 // CRLF is an abbreviation for '\r\n' 58 CRLF = "\r\n" 59 // DoubleQuoteChar is a constant for '"' 60 DoubleQuoteChar = '"' 61 // Backslash is a constant for '\' 62 Backslash = '\\' 63 // NULL is a constant for '\N' 64 NULL = "\\N" 65 66 // MinFileIndexWidth is the minimum width of file index. 67 MinFileIndexWidth = 6 // enough for 2^19 files 68 // MaxFileIndexWidth is the maximum width of file index. 69 MaxFileIndexWidth = 20 // enough for 2^64 files 70 // DefaultFileIndexWidth is the default width of file index. 71 DefaultFileIndexWidth = MaxFileIndexWidth 72 73 // BinaryEncodingHex encodes binary data to hex string. 74 BinaryEncodingHex = "hex" 75 // BinaryEncodingBase64 encodes binary data to base64 string. 76 BinaryEncodingBase64 = "base64" 77 78 // DefaultPulsarProducerCacheSize is the default size of the cache for producers 79 // 10240 producers maybe cost 1.1G memory 80 DefaultPulsarProducerCacheSize = 10240 81 82 // DefaultEncoderGroupConcurrency is the default concurrency of encoder group. 83 DefaultEncoderGroupConcurrency = 32 84 85 // DefaultSendBootstrapIntervalInSec is the default interval to send bootstrap message. 86 DefaultSendBootstrapIntervalInSec = int64(120) 87 // DefaultSendBootstrapInMsgCount is the default number of messages to send bootstrap message. 88 DefaultSendBootstrapInMsgCount = int32(10000) 89 // DefaultSendBootstrapToAllPartition is the default value of 90 // whether to send bootstrap message to all partitions. 91 DefaultSendBootstrapToAllPartition = true 92 93 // DefaultMaxReconnectToPulsarBroker is the default max reconnect times to pulsar broker. 94 // The pulsar client uses an exponential backoff with jitter to reconnect to the broker. 95 // Based on test, when the max reconnect times is 3, 96 // the total time of reconnecting to brokers is about 30 seconds. 97 DefaultMaxReconnectToPulsarBroker = 3 98 ) 99 100 // AtomicityLevel represents the atomicity level of a changefeed. 101 type AtomicityLevel string 102 103 // ShouldSplitTxn returns whether the sink should split txn. 104 func (l AtomicityLevel) ShouldSplitTxn() bool { 105 if l == unknownTxnAtomicity { 106 l = defaultTxnAtomicity 107 } 108 return l == noneTxnAtomicity 109 } 110 111 func (l AtomicityLevel) validate(scheme string) error { 112 switch l { 113 case unknownTxnAtomicity: 114 case noneTxnAtomicity: 115 // Do nothing here to avoid modifying the persistence parameters. 116 case tableTxnAtomicity: 117 // MqSink only support `noneTxnAtomicity`. 118 if sink.IsMQScheme(scheme) { 119 errMsg := fmt.Sprintf("%s level atomicity is not supported by %s scheme", l, scheme) 120 return cerror.ErrSinkURIInvalid.GenWithStackByArgs(errMsg) 121 } 122 default: 123 errMsg := fmt.Sprintf("%s level atomicity is not supported by %s scheme", l, scheme) 124 return cerror.ErrSinkURIInvalid.GenWithStackByArgs(errMsg) 125 } 126 return nil 127 } 128 129 // SinkConfig represents sink config for a changefeed 130 type SinkConfig struct { 131 TxnAtomicity *AtomicityLevel `toml:"transaction-atomicity" json:"transaction-atomicity,omitempty"` 132 // Protocol is NOT available when the downstream is DB. 133 Protocol *string `toml:"protocol" json:"protocol,omitempty"` 134 135 // DispatchRules is only available when the downstream is MQ. 136 DispatchRules []*DispatchRule `toml:"dispatchers" json:"dispatchers,omitempty"` 137 // CSVConfig is only available when the downstream is Storage. 138 CSVConfig *CSVConfig `toml:"csv" json:"csv,omitempty"` 139 140 ColumnSelectors []*ColumnSelector `toml:"column-selectors" json:"column-selectors,omitempty"` 141 // SchemaRegistry is only available when the downstream is MQ using avro protocol. 142 SchemaRegistry *string `toml:"schema-registry" json:"schema-registry,omitempty"` 143 // EncoderConcurrency is only available when the downstream is MQ. 144 EncoderConcurrency *int `toml:"encoder-concurrency" json:"encoder-concurrency,omitempty"` 145 // Terminator is NOT available when the downstream is DB. 146 Terminator *string `toml:"terminator" json:"terminator,omitempty"` 147 // DateSeparator is only available when the downstream is Storage. 148 DateSeparator *string `toml:"date-separator" json:"date-separator,omitempty"` 149 // EnablePartitionSeparator is only available when the downstream is Storage. 150 EnablePartitionSeparator *bool `toml:"enable-partition-separator" json:"enable-partition-separator,omitempty"` 151 // FileIndexWidth is only available when the downstream is Storage 152 FileIndexWidth *int `toml:"file-index-digit,omitempty" json:"file-index-digit,omitempty"` 153 154 // EnableKafkaSinkV2 enabled then the kafka-go sink will be used. 155 // It is only available when the downstream is MQ. 156 EnableKafkaSinkV2 *bool `toml:"enable-kafka-sink-v2" json:"enable-kafka-sink-v2,omitempty"` 157 158 // OnlyOutputUpdatedColumns is only available when the downstream is MQ. 159 OnlyOutputUpdatedColumns *bool `toml:"only-output-updated-columns" json:"only-output-updated-columns,omitempty"` 160 161 // DeleteOnlyOutputHandleKeyColumns is only available when the downstream is MQ. 162 DeleteOnlyOutputHandleKeyColumns *bool `toml:"delete-only-output-handle-key-columns" json:"delete-only-output-handle-key-columns,omitempty"` 163 164 // ContentCompatible is only available when the downstream is MQ. 165 ContentCompatible *bool `toml:"content-compatible" json:"content-compatible,omitempty"` 166 167 // TiDBSourceID is the source ID of the upstream TiDB, 168 // which is used to set the `tidb_cdc_write_source` session variable. 169 // Note: This field is only used internally and only used in the MySQL sink. 170 TiDBSourceID uint64 `toml:"-" json:"-"` 171 // SafeMode is only available when the downstream is DB. 172 SafeMode *bool `toml:"safe-mode" json:"safe-mode,omitempty"` 173 KafkaConfig *KafkaConfig `toml:"kafka-config" json:"kafka-config,omitempty"` 174 PulsarConfig *PulsarConfig `toml:"pulsar-config" json:"pulsar-config,omitempty"` 175 MySQLConfig *MySQLConfig `toml:"mysql-config" json:"mysql-config,omitempty"` 176 CloudStorageConfig *CloudStorageConfig `toml:"cloud-storage-config" json:"cloud-storage-config,omitempty"` 177 178 // AdvanceTimeoutInSec is a duration in second. If a table sink progress hasn't been 179 // advanced for this given duration, the sink will be canceled and re-established. 180 AdvanceTimeoutInSec *uint `toml:"advance-timeout-in-sec" json:"advance-timeout-in-sec,omitempty"` 181 182 // Simple Protocol only config, use to control the behavior of sending bootstrap message. 183 // Note: When one of the following conditions is set to negative value, 184 // bootstrap sending function will be disabled. 185 // SendBootstrapIntervalInSec is the interval in seconds to send bootstrap message. 186 SendBootstrapIntervalInSec *int64 `toml:"send-bootstrap-interval-in-sec" json:"send-bootstrap-interval-in-sec,omitempty"` 187 // SendBootstrapInMsgCount means bootstrap messages are being sent every SendBootstrapInMsgCount row change messages. 188 SendBootstrapInMsgCount *int32 `toml:"send-bootstrap-in-msg-count" json:"send-bootstrap-in-msg-count,omitempty"` 189 // SendBootstrapToAllPartition determines whether to send bootstrap message to all partitions. 190 // If set to false, bootstrap message will only be sent to the first partition of each topic. 191 // Default value is true. 192 SendBootstrapToAllPartition *bool `toml:"send-bootstrap-to-all-partition" json:"send-bootstrap-to-all-partition,omitempty"` 193 194 // Debezium only. Whether schema should be excluded in the output. 195 DebeziumDisableSchema *bool `toml:"debezium-disable-schema" json:"debezium-disable-schema,omitempty"` 196 197 // OpenProtocol related configurations 198 OpenProtocol *OpenProtocolConfig `toml:"open" json:"open,omitempty"` 199 200 // DebeziumConfig related configurations 201 Debezium *DebeziumConfig `toml:"debezium" json:"debezium,omitempty"` 202 } 203 204 // MaskSensitiveData masks sensitive data in SinkConfig 205 func (s *SinkConfig) MaskSensitiveData() { 206 if s.SchemaRegistry != nil { 207 s.SchemaRegistry = aws.String(util.MaskSensitiveDataInURI(*s.SchemaRegistry)) 208 } 209 if s.KafkaConfig != nil { 210 s.KafkaConfig.MaskSensitiveData() 211 } 212 if s.PulsarConfig != nil { 213 s.PulsarConfig.MaskSensitiveData() 214 } 215 } 216 217 // ShouldSendBootstrapMsg returns whether the sink should send bootstrap message. 218 // Only enable bootstrap sending function for simple protocol 219 // and when both send-bootstrap-interval-in-sec and send-bootstrap-in-msg-count are > 0 220 func (s *SinkConfig) ShouldSendBootstrapMsg() bool { 221 if s == nil { 222 return false 223 } 224 protocol := util.GetOrZero(s.Protocol) 225 226 return protocol == ProtocolSimple.String() && 227 util.GetOrZero(s.SendBootstrapIntervalInSec) > 0 && 228 util.GetOrZero(s.SendBootstrapInMsgCount) > 0 229 } 230 231 // CSVConfig defines a series of configuration items for csv codec. 232 type CSVConfig struct { 233 // delimiter between fields, it can be 1 character or at most 2 characters 234 // It can not be CR or LF or contains CR or LF. 235 // It should have exclusive characters with quote. 236 Delimiter string `toml:"delimiter" json:"delimiter"` 237 // quoting character 238 Quote string `toml:"quote" json:"quote"` 239 // representation of null values 240 NullString string `toml:"null" json:"null"` 241 // whether to include commit ts 242 IncludeCommitTs bool `toml:"include-commit-ts" json:"include-commit-ts"` 243 // encoding method of binary type 244 BinaryEncodingMethod string `toml:"binary-encoding-method" json:"binary-encoding-method"` 245 // output old value 246 OutputOldValue bool `toml:"output-old-value" json:"output-old-value"` 247 // output handle key 248 OutputHandleKey bool `toml:"output-handle-key" json:"output-handle-key"` 249 } 250 251 func (c *CSVConfig) validateAndAdjust() error { 252 if c == nil { 253 return nil 254 } 255 256 // validate quote 257 if len(c.Quote) > 1 { 258 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 259 errors.New("csv config quote contains more than one character")) 260 } 261 if len(c.Quote) == 1 { 262 quote := c.Quote[0] 263 if quote == CR || quote == LF { 264 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 265 errors.New("csv config quote cannot be line break character")) 266 } 267 } 268 269 // validate delimiter 270 switch len(c.Delimiter) { 271 case 0: 272 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 273 errors.New("csv config delimiter cannot be empty")) 274 case 1, 2, 3: 275 if strings.ContainsRune(c.Delimiter, CR) || strings.ContainsRune(c.Delimiter, LF) { 276 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 277 errors.New("csv config delimiter contains line break characters")) 278 } 279 default: 280 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 281 errors.New("csv config delimiter contains more than three characters, note that escape "+ 282 "sequences can only be used in double quotes in toml configuration items.")) 283 } 284 285 if len(c.Quote) > 0 { 286 for _, r := range c.Delimiter { 287 if strings.ContainsRune(c.Quote, r) { 288 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 289 errors.New("csv config quote and delimiter has common characters which is not allowed")) 290 } 291 } 292 } 293 294 // validate binary encoding method 295 switch c.BinaryEncodingMethod { 296 case BinaryEncodingHex, BinaryEncodingBase64: 297 default: 298 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 299 errors.New("csv config binary-encoding-method can only be hex or base64")) 300 } 301 302 return nil 303 } 304 305 // DateSeparator specifies the date separator in storage destination path 306 type DateSeparator int 307 308 // Enum types of DateSeparator 309 const ( 310 DateSeparatorNone DateSeparator = iota 311 DateSeparatorYear 312 DateSeparatorMonth 313 DateSeparatorDay 314 ) 315 316 // FromString converts the separator from string to DateSeperator enum type. 317 func (d *DateSeparator) FromString(separator string) error { 318 switch strings.ToLower(separator) { 319 case "none": 320 *d = DateSeparatorNone 321 case "year": 322 *d = DateSeparatorYear 323 case "month": 324 *d = DateSeparatorMonth 325 case "day": 326 *d = DateSeparatorDay 327 default: 328 return cerror.ErrStorageSinkInvalidDateSeparator.GenWithStackByArgs(separator) 329 } 330 331 return nil 332 } 333 334 // GetPattern returns the pattern of the date separator. 335 func (d DateSeparator) GetPattern() string { 336 switch d { 337 case DateSeparatorNone: 338 return "" 339 case DateSeparatorYear: 340 return `\d{4}` 341 case DateSeparatorMonth: 342 return `\d{4}-\d{2}` 343 case DateSeparatorDay: 344 return `\d{4}-\d{2}-\d{2}` 345 default: 346 return "" 347 } 348 } 349 350 func (d DateSeparator) String() string { 351 switch d { 352 case DateSeparatorNone: 353 return "none" 354 case DateSeparatorYear: 355 return "year" 356 case DateSeparatorMonth: 357 return "month" 358 case DateSeparatorDay: 359 return "day" 360 default: 361 return "unknown" 362 } 363 } 364 365 // DispatchRule represents partition rule for a table. 366 type DispatchRule struct { 367 Matcher []string `toml:"matcher" json:"matcher"` 368 // Deprecated, please use PartitionRule. 369 DispatcherRule string `toml:"dispatcher" json:"dispatcher"` 370 // PartitionRule is an alias added for DispatcherRule to mitigate confusions. 371 // In the future release, the DispatcherRule is expected to be removed . 372 PartitionRule string `toml:"partition" json:"partition"` 373 374 // IndexName is set when using index-value dispatcher with specified index. 375 IndexName string `toml:"index" json:"index"` 376 377 // Columns are set when using columns dispatcher. 378 Columns []string `toml:"columns" json:"columns"` 379 380 TopicRule string `toml:"topic" json:"topic"` 381 } 382 383 // ColumnSelector represents a column selector for a table. 384 type ColumnSelector struct { 385 Matcher []string `toml:"matcher" json:"matcher"` 386 Columns []string `toml:"columns" json:"columns"` 387 } 388 389 // CodecConfig represents a MQ codec configuration 390 type CodecConfig struct { 391 EnableTiDBExtension *bool `toml:"enable-tidb-extension" json:"enable-tidb-extension,omitempty"` 392 MaxBatchSize *int `toml:"max-batch-size" json:"max-batch-size,omitempty"` 393 AvroEnableWatermark *bool `toml:"avro-enable-watermark" json:"avro-enable-watermark"` 394 AvroDecimalHandlingMode *string `toml:"avro-decimal-handling-mode" json:"avro-decimal-handling-mode,omitempty"` 395 AvroBigintUnsignedHandlingMode *string `toml:"avro-bigint-unsigned-handling-mode" json:"avro-bigint-unsigned-handling-mode,omitempty"` 396 EncodingFormat *string `toml:"encoding-format" json:"encoding-format,omitempty"` 397 } 398 399 // KafkaConfig represents a kafka sink configuration 400 type KafkaConfig struct { 401 PartitionNum *int32 `toml:"partition-num" json:"partition-num,omitempty"` 402 ReplicationFactor *int16 `toml:"replication-factor" json:"replication-factor,omitempty"` 403 KafkaVersion *string `toml:"kafka-version" json:"kafka-version,omitempty"` 404 MaxMessageBytes *int `toml:"max-message-bytes" json:"max-message-bytes,omitempty"` 405 Compression *string `toml:"compression" json:"compression,omitempty"` 406 KafkaClientID *string `toml:"kafka-client-id" json:"kafka-client-id,omitempty"` 407 AutoCreateTopic *bool `toml:"auto-create-topic" json:"auto-create-topic,omitempty"` 408 DialTimeout *string `toml:"dial-timeout" json:"dial-timeout,omitempty"` 409 WriteTimeout *string `toml:"write-timeout" json:"write-timeout,omitempty"` 410 ReadTimeout *string `toml:"read-timeout" json:"read-timeout,omitempty"` 411 RequiredAcks *int `toml:"required-acks" json:"required-acks,omitempty"` 412 SASLUser *string `toml:"sasl-user" json:"sasl-user,omitempty"` 413 SASLPassword *string `toml:"sasl-password" json:"sasl-password,omitempty"` 414 SASLMechanism *string `toml:"sasl-mechanism" json:"sasl-mechanism,omitempty"` 415 SASLGssAPIAuthType *string `toml:"sasl-gssapi-auth-type" json:"sasl-gssapi-auth-type,omitempty"` 416 SASLGssAPIKeytabPath *string `toml:"sasl-gssapi-keytab-path" json:"sasl-gssapi-keytab-path,omitempty"` 417 SASLGssAPIKerberosConfigPath *string `toml:"sasl-gssapi-kerberos-config-path" json:"sasl-gssapi-kerberos-config-path,omitempty"` 418 SASLGssAPIServiceName *string `toml:"sasl-gssapi-service-name" json:"sasl-gssapi-service-name,omitempty"` 419 SASLGssAPIUser *string `toml:"sasl-gssapi-user" json:"sasl-gssapi-user,omitempty"` 420 SASLGssAPIPassword *string `toml:"sasl-gssapi-password" json:"sasl-gssapi-password,omitempty"` 421 SASLGssAPIRealm *string `toml:"sasl-gssapi-realm" json:"sasl-gssapi-realm,omitempty"` 422 SASLGssAPIDisablePafxfast *bool `toml:"sasl-gssapi-disable-pafxfast" json:"sasl-gssapi-disable-pafxfast,omitempty"` 423 SASLOAuthClientID *string `toml:"sasl-oauth-client-id" json:"sasl-oauth-client-id,omitempty"` 424 SASLOAuthClientSecret *string `toml:"sasl-oauth-client-secret" json:"sasl-oauth-client-secret,omitempty"` 425 SASLOAuthTokenURL *string `toml:"sasl-oauth-token-url" json:"sasl-oauth-token-url,omitempty"` 426 SASLOAuthScopes []string `toml:"sasl-oauth-scopes" json:"sasl-oauth-scopes,omitempty"` 427 SASLOAuthGrantType *string `toml:"sasl-oauth-grant-type" json:"sasl-oauth-grant-type,omitempty"` 428 SASLOAuthAudience *string `toml:"sasl-oauth-audience" json:"sasl-oauth-audience,omitempty"` 429 EnableTLS *bool `toml:"enable-tls" json:"enable-tls,omitempty"` 430 CA *string `toml:"ca" json:"ca,omitempty"` 431 Cert *string `toml:"cert" json:"cert,omitempty"` 432 Key *string `toml:"key" json:"key,omitempty"` 433 InsecureSkipVerify *bool `toml:"insecure-skip-verify" json:"insecure-skip-verify,omitempty"` 434 CodecConfig *CodecConfig `toml:"codec-config" json:"codec-config,omitempty"` 435 LargeMessageHandle *LargeMessageHandleConfig `toml:"large-message-handle" json:"large-message-handle,omitempty"` 436 GlueSchemaRegistryConfig *GlueSchemaRegistryConfig `toml:"glue-schema-registry-config" json:"glue-schema-registry-config"` 437 } 438 439 // MaskSensitiveData masks sensitive data in KafkaConfig 440 func (k *KafkaConfig) MaskSensitiveData() { 441 k.SASLPassword = aws.String("******") 442 k.SASLGssAPIPassword = aws.String("******") 443 k.SASLOAuthClientSecret = aws.String("******") 444 k.Key = aws.String("******") 445 if k.GlueSchemaRegistryConfig != nil { 446 k.GlueSchemaRegistryConfig.AccessKey = "******" 447 k.GlueSchemaRegistryConfig.Token = "******" 448 k.GlueSchemaRegistryConfig.SecretAccessKey = "******" 449 } 450 if k.SASLOAuthTokenURL != nil { 451 k.SASLOAuthTokenURL = aws.String(util.MaskSensitiveDataInURI(*k.SASLOAuthTokenURL)) 452 } 453 } 454 455 // PulsarCompressionType is the compression type for pulsar 456 type PulsarCompressionType string 457 458 // Value returns the pulsar compression type 459 func (p *PulsarCompressionType) Value() pulsar.CompressionType { 460 if p == nil { 461 return 0 462 } 463 switch strings.ToLower(string(*p)) { 464 case "lz4": 465 return pulsar.LZ4 466 case "zlib": 467 return pulsar.ZLib 468 case "zstd": 469 return pulsar.ZSTD 470 default: 471 return 0 // default is no compression 472 } 473 } 474 475 // TimeMill is the time in milliseconds 476 type TimeMill int 477 478 // Duration returns the time in seconds as a duration 479 func (t *TimeMill) Duration() time.Duration { 480 if t == nil { 481 return 0 482 } 483 return time.Duration(*t) * time.Millisecond 484 } 485 486 // NewTimeMill returns a new time in milliseconds 487 func NewTimeMill(x int) *TimeMill { 488 t := TimeMill(x) 489 return &t 490 } 491 492 // TimeSec is the time in seconds 493 type TimeSec int 494 495 // Duration returns the time in seconds as a duration 496 func (t *TimeSec) Duration() time.Duration { 497 if t == nil { 498 return 0 499 } 500 return time.Duration(*t) * time.Second 501 } 502 503 // NewTimeSec returns a new time in seconds 504 func NewTimeSec(x int) *TimeSec { 505 t := TimeSec(x) 506 return &t 507 } 508 509 // OAuth2 is the configuration for OAuth2 510 type OAuth2 struct { 511 // OAuth2IssuerURL the URL of the authorization server. 512 OAuth2IssuerURL string `toml:"oauth2-issuer-url" json:"oauth2-issuer-url,omitempty"` 513 // OAuth2Audience the URL of the resource server. 514 OAuth2Audience string `toml:"oauth2-audience" json:"oauth2-audience,omitempty"` 515 // OAuth2PrivateKey the private key used to sign the server. 516 OAuth2PrivateKey string `toml:"oauth2-private-key" json:"oauth2-private-key,omitempty"` 517 // OAuth2ClientID the client ID of the application. 518 OAuth2ClientID string `toml:"oauth2-client-id" json:"oauth2-client-id,omitempty"` 519 // OAuth2Scope scope 520 OAuth2Scope string `toml:"oauth2-scope" json:"oauth2-scope,omitempty"` 521 } 522 523 func (o *OAuth2) validate() (err error) { 524 if o == nil { 525 return nil 526 } 527 if len(o.OAuth2IssuerURL) == 0 || len(o.OAuth2ClientID) == 0 || len(o.OAuth2PrivateKey) == 0 || 528 len(o.OAuth2Audience) == 0 { 529 return fmt.Errorf("issuer-url and audience and private-key and client-id not be empty") 530 } 531 return nil 532 } 533 534 // PulsarConfig pulsar sink configuration 535 type PulsarConfig struct { 536 TLSKeyFilePath *string `toml:"tls-key-file-path" json:"tls-key-file-path,omitempty"` 537 TLSCertificateFile *string `toml:"tls-certificate-file" json:"tls-certificate-file,omitempty"` 538 TLSTrustCertsFilePath *string `toml:"tls-trust-certs-file-path" json:"tls-trust-certs-file-path,omitempty"` 539 540 // PulsarProducerCacheSize is the size of the cache of pulsar producers 541 PulsarProducerCacheSize *int32 `toml:"pulsar-producer-cache-size" json:"pulsar-producer-cache-size,omitempty"` 542 543 // PulsarVersion print the version of pulsar 544 PulsarVersion *string `toml:"pulsar-version" json:"pulsar-version,omitempty"` 545 546 // pulsar client compression 547 CompressionType *PulsarCompressionType `toml:"compression-type" json:"compression-type,omitempty"` 548 549 // AuthenticationToken the token for the Pulsar server 550 AuthenticationToken *string `toml:"authentication-token" json:"authentication-token,omitempty"` 551 552 // ConnectionTimeout Timeout for the establishment of a TCP connection (default: 5 seconds) 553 ConnectionTimeout *TimeSec `toml:"connection-timeout" json:"connection-timeout,omitempty"` 554 555 // Set the operation timeout (default: 30 seconds) 556 // Producer-create, subscribe and unsubscribe operations will be retried until this interval, after which the 557 // operation will be marked as failed 558 OperationTimeout *TimeSec `toml:"operation-timeout" json:"operation-timeout,omitempty"` 559 560 // BatchingMaxMessages specifies the maximum number of messages permitted in a batch. (default: 1000) 561 BatchingMaxMessages *uint `toml:"batching-max-messages" json:"batching-max-messages,omitempty"` 562 563 // BatchingMaxPublishDelay specifies the time period within which the messages sent will be batched (default: 10ms) 564 // if batch messages are enabled. If set to a non zero value, messages will be queued until this time 565 // interval or until 566 BatchingMaxPublishDelay *TimeMill `toml:"batching-max-publish-delay" json:"batching-max-publish-delay,omitempty"` 567 568 // SendTimeout specifies the timeout for a message that has not been acknowledged by the server since sent. 569 // Send and SendAsync returns an error after timeout. 570 // default: 30s 571 SendTimeout *TimeSec `toml:"send-timeout" json:"send-timeout,omitempty"` 572 573 // TokenFromFile Authentication from the file token, 574 // the path name of the file (the third priority authentication method) 575 TokenFromFile *string `toml:"token-from-file" json:"token-from-file,omitempty"` 576 577 // BasicUserName Account name for pulsar basic authentication (the second priority authentication method) 578 BasicUserName *string `toml:"basic-user-name" json:"basic-user-name,omitempty"` 579 // BasicPassword with account 580 BasicPassword *string `toml:"basic-password" json:"basic-password,omitempty"` 581 582 // AuthTLSCertificatePath create new pulsar authentication provider with specified TLS certificate and private key 583 AuthTLSCertificatePath *string `toml:"auth-tls-certificate-path" json:"auth-tls-certificate-path,omitempty"` 584 // AuthTLSPrivateKeyPath private key 585 AuthTLSPrivateKeyPath *string `toml:"auth-tls-private-key-path" json:"auth-tls-private-key-path,omitempty"` 586 587 // Oauth2 include oauth2-issuer-url oauth2-audience oauth2-private-key oauth2-client-id 588 // and 'type' always use 'client_credentials' 589 OAuth2 *OAuth2 `toml:"oauth2" json:"oauth2,omitempty"` 590 591 // BrokerURL is used to configure service brokerUrl for the Pulsar service. 592 // This parameter is a part of the `sink-uri`. Internal use only. 593 BrokerURL string `toml:"-" json:"-"` 594 // SinkURI is the parsed sinkURI. Internal use only. 595 SinkURI *url.URL `toml:"-" json:"-"` 596 } 597 598 // MaskSensitiveData masks sensitive data in PulsarConfig 599 func (c *PulsarConfig) MaskSensitiveData() { 600 if c.AuthenticationToken != nil { 601 c.AuthenticationToken = aws.String("******") 602 } 603 if c.BasicPassword != nil { 604 c.BasicPassword = aws.String("******") 605 } 606 if c.OAuth2 != nil { 607 c.OAuth2.OAuth2PrivateKey = "******" 608 } 609 } 610 611 // Check get broker url 612 func (c *PulsarConfig) validate() (err error) { 613 if c.OAuth2 != nil { 614 if err = c.OAuth2.validate(); err != nil { 615 return err 616 } 617 } 618 return nil 619 } 620 621 // GetDefaultTopicName get default topic name 622 func (c *PulsarConfig) GetDefaultTopicName() string { 623 topicName := c.SinkURI.Path 624 return topicName[1:] 625 } 626 627 // MySQLConfig represents a MySQL sink configuration 628 type MySQLConfig struct { 629 WorkerCount *int `toml:"worker-count" json:"worker-count,omitempty"` 630 MaxTxnRow *int `toml:"max-txn-row" json:"max-txn-row,omitempty"` 631 MaxMultiUpdateRowSize *int `toml:"max-multi-update-row-size" json:"max-multi-update-row-size,omitempty"` 632 MaxMultiUpdateRowCount *int `toml:"max-multi-update-row" json:"max-multi-update-row,omitempty"` 633 TiDBTxnMode *string `toml:"tidb-txn-mode" json:"tidb-txn-mode,omitempty"` 634 SSLCa *string `toml:"ssl-ca" json:"ssl-ca,omitempty"` 635 SSLCert *string `toml:"ssl-cert" json:"ssl-cert,omitempty"` 636 SSLKey *string `toml:"ssl-key" json:"ssl-key,omitempty"` 637 TimeZone *string `toml:"time-zone" json:"time-zone,omitempty"` 638 WriteTimeout *string `toml:"write-timeout" json:"write-timeout,omitempty"` 639 ReadTimeout *string `toml:"read-timeout" json:"read-timeout,omitempty"` 640 Timeout *string `toml:"timeout" json:"timeout,omitempty"` 641 EnableBatchDML *bool `toml:"enable-batch-dml" json:"enable-batch-dml,omitempty"` 642 EnableMultiStatement *bool `toml:"enable-multi-statement" json:"enable-multi-statement,omitempty"` 643 EnableCachePreparedStatement *bool `toml:"enable-cache-prepared-statement" json:"enable-cache-prepared-statement,omitempty"` 644 } 645 646 // CloudStorageConfig represents a cloud storage sink configuration 647 type CloudStorageConfig struct { 648 WorkerCount *int `toml:"worker-count" json:"worker-count,omitempty"` 649 FlushInterval *string `toml:"flush-interval" json:"flush-interval,omitempty"` 650 FileSize *int `toml:"file-size" json:"file-size,omitempty"` 651 652 OutputColumnID *bool `toml:"output-column-id" json:"output-column-id,omitempty"` 653 FileExpirationDays *int `toml:"file-expiration-days" json:"file-expiration-days,omitempty"` 654 FileCleanupCronSpec *string `toml:"file-cleanup-cron-spec" json:"file-cleanup-cron-spec,omitempty"` 655 FlushConcurrency *int `toml:"flush-concurrency" json:"flush-concurrency,omitempty"` 656 } 657 658 func (s *SinkConfig) validateAndAdjust(sinkURI *url.URL) error { 659 if err := s.validateAndAdjustSinkURI(sinkURI); err != nil { 660 return err 661 } 662 663 if sink.IsMySQLCompatibleScheme(sinkURI.Scheme) { 664 return nil 665 } 666 667 protocol, _ := ParseSinkProtocolFromString(util.GetOrZero(s.Protocol)) 668 669 if s.KafkaConfig != nil && s.KafkaConfig.LargeMessageHandle != nil { 670 var ( 671 enableTiDBExtension bool 672 err error 673 ) 674 if s := sinkURI.Query().Get("enable-tidb-extension"); s != "" { 675 enableTiDBExtension, err = strconv.ParseBool(s) 676 if err != nil { 677 return errors.Trace(err) 678 } 679 } 680 err = s.KafkaConfig.LargeMessageHandle.AdjustAndValidate(protocol, enableTiDBExtension) 681 if err != nil { 682 return err 683 } 684 } 685 686 if s.SchemaRegistry != nil && 687 (s.KafkaConfig != nil && s.KafkaConfig.GlueSchemaRegistryConfig != nil) { 688 return cerror.ErrInvalidReplicaConfig. 689 GenWithStackByArgs("schema-registry and glue-schema-registry-config" + 690 "cannot be set at the same time," + 691 "schema-registry is used by confluent schema registry, " + 692 "glue-schema-registry-config is used by aws glue schema registry") 693 } 694 695 if s.KafkaConfig != nil && s.KafkaConfig.GlueSchemaRegistryConfig != nil { 696 err := s.KafkaConfig.GlueSchemaRegistryConfig.Validate() 697 if err != nil { 698 return err 699 } 700 } 701 702 if sink.IsPulsarScheme(sinkURI.Scheme) && s.PulsarConfig == nil { 703 s.PulsarConfig = &PulsarConfig{ 704 SinkURI: sinkURI, 705 } 706 } 707 if s.PulsarConfig != nil { 708 if err := s.PulsarConfig.validate(); err != nil { 709 return err 710 } 711 } 712 713 for _, rule := range s.DispatchRules { 714 if rule.DispatcherRule != "" && rule.PartitionRule != "" { 715 log.Error("dispatcher and partition cannot be configured both", zap.Any("rule", rule)) 716 return cerror.WrapError(cerror.ErrSinkInvalidConfig, 717 errors.New(fmt.Sprintf("dispatcher and partition cannot be "+ 718 "configured both for rule:%v", rule))) 719 } 720 // After `validate()` is called, we only use PartitionRule to represent a partition 721 // dispatching rule. So when DispatcherRule is not empty, we assign its 722 // value to PartitionRule and clear itself. 723 if rule.DispatcherRule != "" { 724 rule.PartitionRule = rule.DispatcherRule 725 rule.DispatcherRule = "" 726 } 727 } 728 729 if util.GetOrZero(s.EncoderConcurrency) < 0 { 730 return cerror.ErrSinkInvalidConfig.GenWithStack( 731 "encoder-concurrency should greater than 0, but got %d", s.EncoderConcurrency) 732 } 733 734 // validate terminator 735 if s.Terminator == nil { 736 s.Terminator = util.AddressOf(CRLF) 737 } 738 739 if util.GetOrZero(s.DeleteOnlyOutputHandleKeyColumns) && protocol == ProtocolCsv { 740 return cerror.ErrSinkInvalidConfig.GenWithStack( 741 "CSV protocol always output all columns for the delete event, " + 742 "do not set `delete-only-output-handle-key-columns` to true") 743 } 744 745 // validate storage sink related config 746 if sinkURI != nil && sink.IsStorageScheme(sinkURI.Scheme) { 747 // validate date separator 748 if len(util.GetOrZero(s.DateSeparator)) > 0 { 749 var separator DateSeparator 750 if err := separator.FromString(util.GetOrZero(s.DateSeparator)); err != nil { 751 return cerror.WrapError(cerror.ErrSinkInvalidConfig, err) 752 } 753 } 754 755 // File index width should be in [minFileIndexWidth, maxFileIndexWidth]. 756 // In most scenarios, the user does not need to change this configuration, 757 // so the default value of this parameter is not set and just make silent 758 // adjustments here. 759 if util.GetOrZero(s.FileIndexWidth) < MinFileIndexWidth || 760 util.GetOrZero(s.FileIndexWidth) > MaxFileIndexWidth { 761 s.FileIndexWidth = util.AddressOf(DefaultFileIndexWidth) 762 } 763 764 if err := s.CSVConfig.validateAndAdjust(); err != nil { 765 return err 766 } 767 } 768 769 if util.GetOrZero(s.AdvanceTimeoutInSec) == 0 { 770 log.Warn(fmt.Sprintf("advance-timeout-in-sec is not set, use default value: %d seconds", DefaultAdvanceTimeoutInSec)) 771 s.AdvanceTimeoutInSec = util.AddressOf(DefaultAdvanceTimeoutInSec) 772 } 773 774 return nil 775 } 776 777 // validateAndAdjustSinkURI validate and adjust `Protocol` and `TxnAtomicity` by sinkURI. 778 func (s *SinkConfig) validateAndAdjustSinkURI(sinkURI *url.URL) error { 779 if sinkURI == nil { 780 return nil 781 } 782 783 if err := s.applyParameterBySinkURI(sinkURI); err != nil { 784 if !cerror.ErrIncompatibleSinkConfig.Equal(err) { 785 return err 786 } 787 // Ignore `ErrIncompatibleSinkConfig` here to: 788 // 1. Keep compatibility with old version. 789 // 2. Avoid throwing error when create changefeed. 790 log.Warn("sink-uri is not compatible with the sink config, "+ 791 "the configuration in sink URI will be used", zap.Error(err)) 792 } 793 794 // validate that TxnAtomicity is valid and compatible with the scheme. 795 if err := util.GetOrZero(s.TxnAtomicity).validate(sinkURI.Scheme); err != nil { 796 return err 797 } 798 799 // Adjust that protocol is compatible with the scheme. For testing purposes, 800 // any protocol should be legal for blackhole. 801 if sink.IsMQScheme(sinkURI.Scheme) || sink.IsStorageScheme(sinkURI.Scheme) { 802 _, err := ParseSinkProtocolFromString(util.GetOrZero(s.Protocol)) 803 if err != nil { 804 return err 805 } 806 } else if sink.IsMySQLCompatibleScheme(sinkURI.Scheme) && s.Protocol != nil { 807 return cerror.ErrSinkURIInvalid.GenWithStackByArgs(fmt.Sprintf("protocol %s "+ 808 "is incompatible with %s scheme", util.GetOrZero(s.Protocol), sinkURI.Scheme)) 809 } 810 811 log.Info("succeed to parse parameter from sink uri", 812 zap.String("protocol", util.GetOrZero(s.Protocol)), 813 zap.String("txnAtomicity", string(util.GetOrZero(s.TxnAtomicity)))) 814 return nil 815 } 816 817 // applyParameterBySinkURI parse sinkURI and set `Protocol` and `TxnAtomicity` to `SinkConfig`. 818 // Return: 819 // - ErrIncompatibleSinkConfig to terminate `updated` changefeed operation. 820 func (s *SinkConfig) applyParameterBySinkURI(sinkURI *url.URL) error { 821 if sinkURI == nil { 822 return nil 823 } 824 825 cfgInSinkURI := map[string]string{} 826 cfgInFile := map[string]string{} 827 params := sinkURI.Query() 828 829 txnAtomicityFromURI := AtomicityLevel(params.Get(TxnAtomicityKey)) 830 if txnAtomicityFromURI != unknownTxnAtomicity { 831 if util.GetOrZero(s.TxnAtomicity) != unknownTxnAtomicity && util.GetOrZero(s.TxnAtomicity) != txnAtomicityFromURI { 832 cfgInSinkURI[TxnAtomicityKey] = string(txnAtomicityFromURI) 833 cfgInFile[TxnAtomicityKey] = string(util.GetOrZero(s.TxnAtomicity)) 834 } 835 s.TxnAtomicity = util.AddressOf(txnAtomicityFromURI) 836 } 837 838 protocolFromURI := params.Get(ProtocolKey) 839 if protocolFromURI != "" { 840 if s.Protocol != nil && util.GetOrZero(s.Protocol) != protocolFromURI { 841 cfgInSinkURI[ProtocolKey] = protocolFromURI 842 cfgInFile[ProtocolKey] = util.GetOrZero(s.Protocol) 843 } 844 s.Protocol = util.AddressOf(protocolFromURI) 845 } 846 847 getError := func() error { 848 if len(cfgInSinkURI) != len(cfgInFile) { 849 log.Panic("inconsistent configuration items in sink uri and configuration file", 850 zap.Any("cfgInSinkURI", cfgInSinkURI), zap.Any("cfgInFile", cfgInFile)) 851 } 852 if len(cfgInSinkURI) == 0 && len(cfgInFile) == 0 { 853 return nil 854 } 855 getErrMsg := func(cfgIn map[string]string) string { 856 var errMsg strings.Builder 857 for k, v := range cfgIn { 858 errMsg.WriteString(fmt.Sprintf("%s=%s, ", k, v)) 859 } 860 return errMsg.String()[0 : errMsg.Len()-2] 861 } 862 return cerror.ErrIncompatibleSinkConfig.GenWithStackByArgs( 863 getErrMsg(cfgInSinkURI), getErrMsg(cfgInFile)) 864 } 865 return getError() 866 } 867 868 // CheckCompatibilityWithSinkURI check whether the sinkURI is compatible with the sink config. 869 func (s *SinkConfig) CheckCompatibilityWithSinkURI( 870 oldSinkConfig *SinkConfig, sinkURIStr string, 871 ) error { 872 sinkURI, err := url.Parse(sinkURIStr) 873 if err != nil { 874 return cerror.WrapError(cerror.ErrSinkURIInvalid, err) 875 } 876 877 cfgParamsChanged := s.Protocol != oldSinkConfig.Protocol || 878 s.TxnAtomicity != oldSinkConfig.TxnAtomicity 879 880 isURIParamsChanged := func(oldCfg SinkConfig) bool { 881 err := oldCfg.applyParameterBySinkURI(sinkURI) 882 return cerror.ErrIncompatibleSinkConfig.Equal(err) 883 } 884 uriParamsChanged := isURIParamsChanged(*oldSinkConfig) 885 886 if !uriParamsChanged && !cfgParamsChanged { 887 return nil 888 } 889 890 compatibilityError := s.applyParameterBySinkURI(sinkURI) 891 if uriParamsChanged && cerror.ErrIncompatibleSinkConfig.Equal(compatibilityError) { 892 // Ignore compatibility error if the sinkURI make such changes. 893 return nil 894 } 895 return compatibilityError 896 } 897 898 // GlueSchemaRegistryConfig represents a Glue Schema Registry configuration 899 type GlueSchemaRegistryConfig struct { 900 // Name of the schema registry 901 RegistryName string `toml:"registry-name" json:"registry-name"` 902 // Region of the schema registry 903 Region string `toml:"region" json:"region"` 904 // AccessKey of the schema registry 905 AccessKey string `toml:"access-key" json:"access-key,omitempty"` 906 // SecretAccessKey of the schema registry 907 SecretAccessKey string `toml:"secret-access-key" json:"secret-access-key,omitempty"` 908 Token string `toml:"token" json:"token,omitempty"` 909 } 910 911 // Validate the GlueSchemaRegistryConfig. 912 func (g *GlueSchemaRegistryConfig) Validate() error { 913 if g.RegistryName == "" { 914 return cerror.ErrInvalidGlueSchemaRegistryConfig. 915 GenWithStack("registry-name is empty, is must be set") 916 } 917 if g.Region == "" { 918 return cerror.ErrInvalidGlueSchemaRegistryConfig. 919 GenWithStack("region is empty, is must be set") 920 } 921 if g.AccessKey != "" && g.SecretAccessKey == "" { 922 return cerror.ErrInvalidGlueSchemaRegistryConfig. 923 GenWithStack("access-key is set, but access-key-secret is empty, they must be set together") 924 } 925 return nil 926 } 927 928 // NoCredentials returns true if no credentials are set. 929 func (g *GlueSchemaRegistryConfig) NoCredentials() bool { 930 return g.AccessKey == "" && g.SecretAccessKey == "" && g.Token == "" 931 } 932 933 // OpenProtocolConfig represents the configurations for open protocol encoding 934 type OpenProtocolConfig struct { 935 OutputOldValue bool `toml:"output-old-value" json:"output-old-value"` 936 } 937 938 // DebeziumConfig represents the configurations for debezium protocol encoding 939 type DebeziumConfig struct { 940 OutputOldValue bool `toml:"output-old-value" json:"output-old-value"` 941 }