github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/changefeed_stmt.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package changefeedccl 10 11 import ( 12 "context" 13 "encoding/hex" 14 "fmt" 15 "math/rand" 16 "net/url" 17 "sort" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/ccl/backupccl" 21 "github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/changefeedbase" 22 "github.com/cockroachdb/cockroach/pkg/ccl/utilccl" 23 "github.com/cockroachdb/cockroach/pkg/jobs" 24 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 25 "github.com/cockroachdb/cockroach/pkg/jobs/jobsprotectedts" 26 "github.com/cockroachdb/cockroach/pkg/keys" 27 "github.com/cockroachdb/cockroach/pkg/kv" 28 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts" 29 "github.com/cockroachdb/cockroach/pkg/roachpb" 30 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 31 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 32 "github.com/cockroachdb/cockroach/pkg/sql" 33 "github.com/cockroachdb/cockroach/pkg/sql/flowinfra" 34 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 35 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 36 "github.com/cockroachdb/cockroach/pkg/sql/types" 37 "github.com/cockroachdb/cockroach/pkg/storage/cloud" 38 "github.com/cockroachdb/cockroach/pkg/util/hlc" 39 "github.com/cockroachdb/cockroach/pkg/util/log" 40 "github.com/cockroachdb/cockroach/pkg/util/retry" 41 "github.com/cockroachdb/cockroach/pkg/util/tracing" 42 "github.com/cockroachdb/cockroach/pkg/util/uuid" 43 "github.com/cockroachdb/errors" 44 crdberrors "github.com/cockroachdb/errors" 45 ) 46 47 func init() { 48 sql.AddPlanHook(changefeedPlanHook) 49 jobs.RegisterConstructor( 50 jobspb.TypeChangefeed, 51 func(job *jobs.Job, _ *cluster.Settings) jobs.Resumer { 52 return &changefeedResumer{job: job} 53 }, 54 ) 55 } 56 57 // changefeedPlanHook implements sql.PlanHookFn. 58 func changefeedPlanHook( 59 ctx context.Context, stmt tree.Statement, p sql.PlanHookState, 60 ) (sql.PlanHookRowFn, sqlbase.ResultColumns, []sql.PlanNode, bool, error) { 61 changefeedStmt, ok := stmt.(*tree.CreateChangefeed) 62 if !ok { 63 return nil, nil, nil, false, nil 64 } 65 66 var sinkURIFn func() (string, error) 67 var header sqlbase.ResultColumns 68 unspecifiedSink := changefeedStmt.SinkURI == nil 69 avoidBuffering := false 70 if unspecifiedSink { 71 // An unspecified sink triggers a fairly radical change in behavior. 72 // Instead of setting up a system.job to emit to a sink in the 73 // background and returning immediately with the job ID, the `CREATE 74 // CHANGEFEED` blocks forever and returns all changes as rows directly 75 // over pgwire. The types of these rows are `(topic STRING, key BYTES, 76 // value BYTES)` and they correspond exactly to what would be emitted to 77 // a sink. 78 sinkURIFn = func() (string, error) { return ``, nil } 79 header = sqlbase.ResultColumns{ 80 {Name: "table", Typ: types.String}, 81 {Name: "key", Typ: types.Bytes}, 82 {Name: "value", Typ: types.Bytes}, 83 } 84 avoidBuffering = true 85 } else { 86 var err error 87 sinkURIFn, err = p.TypeAsString(ctx, changefeedStmt.SinkURI, `CREATE CHANGEFEED`) 88 if err != nil { 89 return nil, nil, nil, false, err 90 } 91 header = sqlbase.ResultColumns{ 92 {Name: "job_id", Typ: types.Int}, 93 } 94 } 95 96 optsFn, err := p.TypeAsStringOpts(ctx, changefeedStmt.Options, changefeedbase.ChangefeedOptionExpectValues) 97 if err != nil { 98 return nil, nil, nil, false, err 99 } 100 101 fn := func(ctx context.Context, _ []sql.PlanNode, resultsCh chan<- tree.Datums) error { 102 ctx, span := tracing.ChildSpan(ctx, stmt.StatementTag()) 103 defer tracing.FinishSpan(span) 104 105 if err := p.RequireAdminRole(ctx, "CREATE CHANGEFEED"); err != nil { 106 return err 107 } 108 109 sinkURI, err := sinkURIFn() 110 if err != nil { 111 return err 112 } 113 if !unspecifiedSink && sinkURI == `` { 114 // Error if someone specifies an INTO with the empty string. We've 115 // already sent the wrong result column headers. 116 return errors.New(`omit the SINK clause for inline results`) 117 } 118 119 opts, err := optsFn() 120 if err != nil { 121 return err 122 } 123 124 jobDescription, err := changefeedJobDescription(p, changefeedStmt, sinkURI, opts) 125 if err != nil { 126 return err 127 } 128 129 statementTime := hlc.Timestamp{ 130 WallTime: p.ExtendedEvalContext().GetStmtTimestamp().UnixNano(), 131 } 132 var initialHighWater hlc.Timestamp 133 if cursor, ok := opts[changefeedbase.OptCursor]; ok { 134 asOf := tree.AsOfClause{Expr: tree.NewStrVal(cursor)} 135 var err error 136 if initialHighWater, err = p.EvalAsOfTimestamp(ctx, asOf); err != nil { 137 return err 138 } 139 statementTime = initialHighWater 140 } 141 142 // For now, disallow targeting a database or wildcard table selection. 143 // Getting it right as tables enter and leave the set over time is 144 // tricky. 145 if len(changefeedStmt.Targets.Databases) > 0 { 146 return errors.Errorf(`CHANGEFEED cannot target %s`, 147 tree.AsString(&changefeedStmt.Targets)) 148 } 149 for _, t := range changefeedStmt.Targets.Tables { 150 p, err := t.NormalizeTablePattern() 151 if err != nil { 152 return err 153 } 154 if _, ok := p.(*tree.TableName); !ok { 155 return errors.Errorf(`CHANGEFEED cannot target %s`, tree.AsString(t)) 156 } 157 } 158 159 // This grabs table descriptors once to get their ids. 160 targetDescs, _, err := backupccl.ResolveTargetsToDescriptors( 161 ctx, p, statementTime, changefeedStmt.Targets, tree.RequestedDescriptors) 162 if err != nil { 163 return err 164 } 165 targets := make(jobspb.ChangefeedTargets, len(targetDescs)) 166 for _, desc := range targetDescs { 167 if tableDesc := desc.Table(hlc.Timestamp{}); tableDesc != nil { 168 targets[tableDesc.ID] = jobspb.ChangefeedTarget{ 169 StatementTimeName: tableDesc.Name, 170 } 171 if err := validateChangefeedTable(targets, tableDesc); err != nil { 172 return err 173 } 174 } 175 } 176 177 details := jobspb.ChangefeedDetails{ 178 Targets: targets, 179 Opts: opts, 180 SinkURI: sinkURI, 181 StatementTime: statementTime, 182 } 183 progress := jobspb.Progress{ 184 Progress: &jobspb.Progress_HighWater{}, 185 Details: &jobspb.Progress_Changefeed{ 186 Changefeed: &jobspb.ChangefeedProgress{}, 187 }, 188 } 189 190 // TODO(dan): In an attempt to present the most helpful error message to the 191 // user, the ordering requirements between all these usage validations have 192 // become extremely fragile and non-obvious. 193 // 194 // - `validateDetails` has to run first to fill in defaults for `envelope` 195 // and `format` if the user didn't specify them. 196 // - Then `getEncoder` is run to return any configuration errors. 197 // - Then the changefeed is opted in to `OptKeyInValue` for any cloud 198 // storage sink. Kafka etc have a key and value field in each message but 199 // cloud storage sinks don't have anywhere to put the key. So if the key 200 // is not in the value, then for DELETEs there is no way to recover which 201 // key was deleted. We could make the user explicitly pass this option for 202 // every cloud storage sink and error if they don't, but that seems 203 // user-hostile for insufficient reason. We can't do this any earlier, 204 // because we might return errors about `key_in_value` being incompatible 205 // which is confusing when the user didn't type that option. 206 // - Finally, we create a "canary" sink to test sink configuration and 207 // connectivity. This has to go last because it is strange to return sink 208 // connectivity errors before we've finished validating all the other 209 // options. We should probably split sink configuration checking and sink 210 // connectivity checking into separate methods. 211 // 212 // The only upside in all this nonsense is the tests are decent. I've tuned 213 // this particular order simply by rearranging stuff until the changefeedccl 214 // tests all pass. 215 parsedSink, err := url.Parse(sinkURI) 216 if err != nil { 217 return err 218 } 219 if details, err = validateDetails(details); err != nil { 220 return err 221 } 222 223 if _, err := getEncoder(details.Opts); err != nil { 224 return err 225 } 226 if isCloudStorageSink(parsedSink) { 227 details.Opts[changefeedbase.OptKeyInValue] = `` 228 } 229 230 // Feature telemetry 231 telemetrySink := parsedSink.Scheme 232 if telemetrySink == `` { 233 telemetrySink = `sinkless` 234 } 235 telemetry.Count(`changefeed.create.sink.` + telemetrySink) 236 telemetry.Count(`changefeed.create.format.` + details.Opts[changefeedbase.OptFormat]) 237 telemetry.CountBucketed(`changefeed.create.num_tables`, int64(len(targets))) 238 239 if details.SinkURI == `` { 240 err := distChangefeedFlow(ctx, p, 0 /* jobID */, details, progress, resultsCh) 241 return MaybeStripRetryableErrorMarker(err) 242 } 243 244 settings := p.ExecCfg().Settings 245 if err := utilccl.CheckEnterpriseEnabled( 246 settings, p.ExecCfg().ClusterID(), p.ExecCfg().Organization(), "CHANGEFEED", 247 ); err != nil { 248 return err 249 } 250 251 // In the case where a user is executing a CREATE CHANGEFEED and is still 252 // waiting for the statement to return, we take the opportunity to ensure 253 // that the user has not made any obvious errors when specifying the sink in 254 // the CREATE CHANGEFEED statement. To do this, we create a "canary" sink, 255 // which will be immediately closed, only to check for errors. 256 { 257 nodeID, err := p.ExtendedEvalContext().NodeID.OptionalNodeIDErr(48274) 258 if err != nil { 259 return err 260 } 261 var nilOracle timestampLowerBoundOracle 262 canarySink, err := getSink( 263 ctx, details.SinkURI, nodeID, details.Opts, details.Targets, 264 settings, nilOracle, p.ExecCfg().DistSQLSrv.ExternalStorageFromURI, 265 ) 266 if err != nil { 267 return MaybeStripRetryableErrorMarker(err) 268 } 269 if err := canarySink.Close(); err != nil { 270 return err 271 } 272 } 273 274 // Make a channel for runChangefeedFlow to signal once everything has 275 // been setup okay. This intentionally abuses what would normally be 276 // hooked up to resultsCh to avoid a bunch of extra plumbing. 277 startedCh := make(chan tree.Datums) 278 279 // The below block creates the job and if there's an initial scan, protects 280 // the data required for that scan. We protect the data here rather than in 281 // Resume to shorten the window that data may be GC'd. The protected 282 // timestamps are removed and created during the execution of the changefeed 283 // by the changeFrontier when checkpointing progress. Additionally protected 284 // timestamps are removed in OnFailOrCancel. See the comment on 285 // changeFrontier.manageProtectedTimestamps for more details on the handling of 286 // protected timestamps. 287 var sj *jobs.StartableJob 288 { 289 var protectedTimestampID uuid.UUID 290 var spansToProtect []roachpb.Span 291 if hasInitialScan := initialScanFromOptions(details.Opts); hasInitialScan { 292 protectedTimestampID = uuid.MakeV4() 293 spansToProtect = makeSpansToProtect(details.Targets) 294 progress.GetChangefeed().ProtectedTimestampRecord = protectedTimestampID 295 } 296 297 jr := jobs.Record{ 298 Description: jobDescription, 299 Username: p.User(), 300 DescriptorIDs: func() (sqlDescIDs []sqlbase.ID) { 301 for _, desc := range targetDescs { 302 sqlDescIDs = append(sqlDescIDs, desc.GetID()) 303 } 304 return sqlDescIDs 305 }(), 306 Details: details, 307 Progress: *progress.GetChangefeed(), 308 } 309 createJobAndProtectedTS := func(ctx context.Context, txn *kv.Txn) (err error) { 310 sj, err = p.ExecCfg().JobRegistry.CreateStartableJobWithTxn(ctx, jr, txn, startedCh) 311 if err != nil { 312 return err 313 } 314 if protectedTimestampID == uuid.Nil { 315 return nil 316 } 317 ptr := jobsprotectedts.MakeRecord(protectedTimestampID, *sj.ID(), 318 statementTime, spansToProtect) 319 return p.ExecCfg().ProtectedTimestampProvider.Protect(ctx, txn, ptr) 320 } 321 if err := p.ExecCfg().DB.Txn(ctx, createJobAndProtectedTS); err != nil { 322 if sj != nil { 323 if err := sj.CleanupOnRollback(ctx); err != nil { 324 log.Warningf(ctx, "failed to cleanup aborted job: %v", err) 325 } 326 } 327 return err 328 } 329 // If we created a protected timestamp for an initial scan, verify it. 330 // Doing this synchronously here rather than asynchronously later provides 331 // a nice UX win in the case that the data isn't actually available. 332 if protectedTimestampID != uuid.Nil { 333 if err := p.ExecCfg().ProtectedTimestampProvider.Verify(ctx, protectedTimestampID); err != nil { 334 if cancelErr := sj.Cancel(ctx); cancelErr != nil { 335 if ctx.Err() == nil { 336 log.Warningf(ctx, "failed to cancel job: %v", cancelErr) 337 } 338 } 339 return err 340 } 341 } 342 } 343 344 // Start the job and wait for it to signal on startedCh. 345 errCh, err := sj.Start(ctx) 346 if err != nil { 347 return err 348 } 349 select { 350 case <-ctx.Done(): 351 return ctx.Err() 352 case err := <-errCh: 353 return err 354 case <-startedCh: 355 // The feed set up without error, return control to the user. 356 } 357 resultsCh <- tree.Datums{ 358 tree.NewDInt(tree.DInt(*sj.ID())), 359 } 360 return nil 361 } 362 return fn, header, nil, avoidBuffering, nil 363 } 364 365 func changefeedJobDescription( 366 p sql.PlanHookState, changefeed *tree.CreateChangefeed, sinkURI string, opts map[string]string, 367 ) (string, error) { 368 cleanedSinkURI, err := cloud.SanitizeExternalStorageURI(sinkURI, []string{changefeedbase.SinkParamSASLPassword}) 369 if err != nil { 370 return "", err 371 } 372 c := &tree.CreateChangefeed{ 373 Targets: changefeed.Targets, 374 SinkURI: tree.NewDString(cleanedSinkURI), 375 } 376 for k, v := range opts { 377 opt := tree.KVOption{Key: tree.Name(k)} 378 if len(v) > 0 { 379 opt.Value = tree.NewDString(v) 380 } 381 c.Options = append(c.Options, opt) 382 } 383 sort.Slice(c.Options, func(i, j int) bool { return c.Options[i].Key < c.Options[j].Key }) 384 ann := p.ExtendedEvalContext().Annotations 385 return tree.AsStringWithFQNames(c, ann), nil 386 } 387 388 func validateDetails(details jobspb.ChangefeedDetails) (jobspb.ChangefeedDetails, error) { 389 if details.Opts == nil { 390 // The proto MarshalTo method omits the Opts field if the map is empty. 391 // So, if no options were specified by the user, Opts will be nil when 392 // the job gets restarted. 393 details.Opts = map[string]string{} 394 } 395 { 396 const opt = changefeedbase.OptResolvedTimestamps 397 if o, ok := details.Opts[opt]; ok && o != `` { 398 if d, err := time.ParseDuration(o); err != nil { 399 return jobspb.ChangefeedDetails{}, err 400 } else if d < 0 { 401 return jobspb.ChangefeedDetails{}, errors.Errorf( 402 `negative durations are not accepted: %s='%s'`, opt, o) 403 } 404 } 405 } 406 { 407 const opt = changefeedbase.OptSchemaChangeEvents 408 switch v := changefeedbase.SchemaChangeEventClass(details.Opts[opt]); v { 409 case ``, changefeedbase.OptSchemaChangeEventClassDefault: 410 details.Opts[opt] = string(changefeedbase.OptSchemaChangeEventClassDefault) 411 case changefeedbase.OptSchemaChangeEventClassColumnChange: 412 // No-op 413 default: 414 return jobspb.ChangefeedDetails{}, errors.Errorf( 415 `unknown %s: %s`, opt, v) 416 } 417 } 418 { 419 const opt = changefeedbase.OptSchemaChangePolicy 420 switch v := changefeedbase.SchemaChangePolicy(details.Opts[opt]); v { 421 case ``, changefeedbase.OptSchemaChangePolicyBackfill: 422 details.Opts[opt] = string(changefeedbase.OptSchemaChangePolicyBackfill) 423 case changefeedbase.OptSchemaChangePolicyNoBackfill: 424 // No-op 425 case changefeedbase.OptSchemaChangePolicyStop: 426 // No-op 427 default: 428 return jobspb.ChangefeedDetails{}, errors.Errorf( 429 `unknown %s: %s`, opt, v) 430 } 431 } 432 { 433 _, withInitialScan := details.Opts[changefeedbase.OptInitialScan] 434 _, noInitialScan := details.Opts[changefeedbase.OptNoInitialScan] 435 if withInitialScan && noInitialScan { 436 return jobspb.ChangefeedDetails{}, errors.Errorf( 437 `cannot specify both %s and %s`, changefeedbase.OptInitialScan, 438 changefeedbase.OptNoInitialScan) 439 } 440 } 441 { 442 const opt = changefeedbase.OptEnvelope 443 switch v := changefeedbase.EnvelopeType(details.Opts[opt]); v { 444 case changefeedbase.OptEnvelopeRow, changefeedbase.OptEnvelopeDeprecatedRow: 445 details.Opts[opt] = string(changefeedbase.OptEnvelopeRow) 446 case changefeedbase.OptEnvelopeKeyOnly: 447 details.Opts[opt] = string(changefeedbase.OptEnvelopeKeyOnly) 448 case ``, changefeedbase.OptEnvelopeWrapped: 449 details.Opts[opt] = string(changefeedbase.OptEnvelopeWrapped) 450 default: 451 return jobspb.ChangefeedDetails{}, errors.Errorf( 452 `unknown %s: %s`, opt, v) 453 } 454 } 455 { 456 const opt = changefeedbase.OptFormat 457 switch v := changefeedbase.FormatType(details.Opts[opt]); v { 458 case ``, changefeedbase.OptFormatJSON: 459 details.Opts[opt] = string(changefeedbase.OptFormatJSON) 460 case changefeedbase.OptFormatAvro: 461 // No-op. 462 default: 463 return jobspb.ChangefeedDetails{}, errors.Errorf( 464 `unknown %s: %s`, opt, v) 465 } 466 } 467 return details, nil 468 } 469 470 func validateChangefeedTable( 471 targets jobspb.ChangefeedTargets, tableDesc *sqlbase.TableDescriptor, 472 ) error { 473 t, ok := targets[tableDesc.ID] 474 if !ok { 475 return errors.Errorf(`unwatched table: %s`, tableDesc.Name) 476 } 477 478 // Technically, the only non-user table known not to work is system.jobs 479 // (which creates a cycle since the resolved timestamp high-water mark is 480 // saved in it), but there are subtle differences in the way many of them 481 // work and this will be under-tested, so disallow them all until demand 482 // dictates. 483 if tableDesc.ID < keys.MinUserDescID { 484 return errors.Errorf(`CHANGEFEEDs are not supported on system tables`) 485 } 486 if tableDesc.IsView() { 487 return errors.Errorf(`CHANGEFEED cannot target views: %s`, tableDesc.Name) 488 } 489 if tableDesc.IsVirtualTable() { 490 return errors.Errorf(`CHANGEFEED cannot target virtual tables: %s`, tableDesc.Name) 491 } 492 if tableDesc.IsSequence() { 493 return errors.Errorf(`CHANGEFEED cannot target sequences: %s`, tableDesc.Name) 494 } 495 if len(tableDesc.Families) != 1 { 496 return errors.Errorf( 497 `CHANGEFEEDs are currently supported on tables with exactly 1 column family: %s has %d`, 498 tableDesc.Name, len(tableDesc.Families)) 499 } 500 501 if tableDesc.State == sqlbase.TableDescriptor_DROP { 502 return errors.Errorf(`"%s" was dropped or truncated`, t.StatementTimeName) 503 } 504 if tableDesc.Name != t.StatementTimeName { 505 return errors.Errorf(`"%s" was renamed to "%s"`, t.StatementTimeName, tableDesc.Name) 506 } 507 508 // TODO(mrtracy): re-enable this when allow-backfill option is added. 509 // if tableDesc.HasColumnBackfillMutation() { 510 // return errors.Errorf(`CHANGEFEEDs cannot operate on tables being backfilled`) 511 // } 512 513 return nil 514 } 515 516 type changefeedResumer struct { 517 job *jobs.Job 518 } 519 520 // generateChangefeedSessionID generates a unique string that is used to 521 // prevent overwriting of output files by the cloudStorageSink. 522 func generateChangefeedSessionID() string { 523 // We read exactly 8 random bytes. 8 bytes should be enough because: 524 // Consider that each new session for a changefeed job can occur at the 525 // same highWater timestamp for its catch up scan. This session ID is 526 // used to ensure that a session emitting files with the same timestamp 527 // as the session before doesn't clobber existing files. Let's assume that 528 // each of these runs for 0 seconds. Our node liveness duration is currently 529 // 9 seconds, but let's go with a conservative duration of 1 second. 530 // With 8 bytes using the rough approximation for the birthday problem 531 // https://en.wikipedia.org/wiki/Birthday_problem#Square_approximation, we 532 // will have a 50% chance of a single collision after sqrt(2^64) = 2^32 533 // sessions. So if we start a new job every second, we get a coin flip chance of 534 // single collision after 136 years. With this same approximation, we get 535 // something like 220 days to have a 0.001% chance of a collision. In practice, 536 // jobs are likely to run for longer and it's likely to take longer for 537 // job adoption, so we should be good with 8 bytes. Similarly, it's clear that 538 // 16 would be way overkill. 4 bytes gives us a 50% chance of collision after 539 // 65K sessions at the same timestamp. 540 const size = 8 541 p := make([]byte, size) 542 buf := make([]byte, hex.EncodedLen(size)) 543 rand.Read(p) 544 hex.Encode(buf, p) 545 return string(buf) 546 } 547 548 // Resume is part of the jobs.Resumer interface. 549 func (b *changefeedResumer) Resume( 550 ctx context.Context, planHookState interface{}, startedCh chan<- tree.Datums, 551 ) error { 552 phs := planHookState.(sql.PlanHookState) 553 execCfg := phs.ExecCfg() 554 jobID := *b.job.ID() 555 details := b.job.Details().(jobspb.ChangefeedDetails) 556 progress := b.job.Progress() 557 558 // We'd like to avoid failing a changefeed unnecessarily, so when an error 559 // bubbles up to this level, we'd like to "retry" the flow if possible. This 560 // could be because the sink is down or because a cockroach node has crashed 561 // or for many other reasons. 562 opts := retry.Options{ 563 InitialBackoff: 5 * time.Millisecond, 564 Multiplier: 2, 565 MaxBackoff: 10 * time.Second, 566 } 567 var err error 568 for r := retry.StartWithCtx(ctx, opts); r.Next(); { 569 if err = distChangefeedFlow(ctx, phs, jobID, details, progress, startedCh); err == nil { 570 return nil 571 } 572 if !IsRetryableError(err) { 573 if ctx.Err() != nil { 574 return ctx.Err() 575 } 576 577 if flowinfra.IsFlowRetryableError(err) { 578 // We don't want to retry flowinfra retryable error in the retry loop above. 579 // This error currently indicates that this node is being drained. As such, 580 // retries will not help. 581 // Instead, we want to make sure that the changefeed job is not marked failed 582 // due to a transient, retryable error. 583 err = jobs.NewRetryJobError(fmt.Sprintf("retryable flow error: %+v", err)) 584 } 585 586 log.Warningf(ctx, `CHANGEFEED job %d returning with error: %+v`, jobID, err) 587 return err 588 } 589 590 log.Warningf(ctx, `CHANGEFEED job %d encountered retryable error: %v`, jobID, err) 591 if metrics, ok := execCfg.JobRegistry.MetricsStruct().Changefeed.(*Metrics); ok { 592 metrics.ErrorRetries.Inc(1) 593 } 594 // Re-load the job in order to update our progress object, which may have 595 // been updated by the changeFrontier processor since the flow started. 596 reloadedJob, reloadErr := execCfg.JobRegistry.LoadJob(ctx, jobID) 597 if reloadErr != nil { 598 if ctx.Err() != nil { 599 return ctx.Err() 600 } 601 log.Warningf(ctx, `CHANGEFEED job %d could not reload job progress; `+ 602 `continuing from last known high-water of %s: %v`, 603 jobID, progress.GetHighWater(), reloadErr) 604 } else { 605 progress = reloadedJob.Progress() 606 } 607 608 // startedCh is normally used to signal back to the creator of the job that 609 // the job has started; however, in this case nothing will ever receive 610 // on the channel, causing the changefeed flow to block. Replace it with 611 // a dummy channel. 612 startedCh = make(chan tree.Datums, 1) 613 } 614 // We only hit this if `r.Next()` returns false, which right now only happens 615 // on context cancellation. 616 return errors.Wrap(err, `ran out of retries`) 617 } 618 619 // OnFailOrCancel is part of the jobs.Resumer interface. 620 func (b *changefeedResumer) OnFailOrCancel(ctx context.Context, planHookState interface{}) error { 621 phs := planHookState.(sql.PlanHookState) 622 execCfg := phs.ExecCfg() 623 progress := b.job.Progress() 624 b.maybeCleanUpProtectedTimestamp(ctx, execCfg.DB, execCfg.ProtectedTimestampProvider, 625 progress.GetChangefeed().ProtectedTimestampRecord) 626 return nil 627 } 628 629 // Try to clean up a protected timestamp created by the changefeed. 630 func (b *changefeedResumer) maybeCleanUpProtectedTimestamp( 631 ctx context.Context, db *kv.DB, pts protectedts.Storage, ptsID uuid.UUID, 632 ) { 633 if ptsID == uuid.Nil { 634 return 635 } 636 if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 637 return pts.Release(ctx, txn, ptsID) 638 }); err != nil && !crdberrors.Is(err, protectedts.ErrNotExists) { 639 // NB: The record should get cleaned up by the reconciliation loop. 640 // No good reason to cause more trouble by returning an error here. 641 // Log and move on. 642 log.Warningf(ctx, "failed to remove protected timestamp record %v: %v", ptsID, err) 643 } 644 } 645 646 var _ jobs.PauseRequester = (*changefeedResumer)(nil) 647 648 // OnPauseRequest implements jobs.PauseRequester. If this changefeed is being 649 // paused, we want to install a protected timestamp at the most recent high 650 // watermark if there isn't already one. 651 func (b *changefeedResumer) OnPauseRequest( 652 ctx context.Context, planHookState interface{}, txn *kv.Txn, progress *jobspb.Progress, 653 ) error { 654 details := b.job.Details().(jobspb.ChangefeedDetails) 655 if _, shouldPause := details.Opts[changefeedbase.OptProtectDataFromGCOnPause]; !shouldPause { 656 return nil 657 } 658 659 cp := progress.GetChangefeed() 660 661 // If we already have a protected timestamp record, keep it where it is. 662 if cp.ProtectedTimestampRecord != uuid.Nil { 663 return nil 664 } 665 666 resolved := progress.GetHighWater() 667 if resolved == nil { 668 // This should only happen if the job was created in a version that did not 669 // use protected timestamps but has yet to checkpoint its high water. 670 // Changefeeds from older versions didn't get protected timestamps so it's 671 // fine to not protect this one. In newer versions changefeeds which perform 672 // an initial scan at the statement time (and don't have an initial high 673 // water) will have a protected timestamp. 674 return nil 675 } 676 677 pts := planHookState.(sql.PlanHookState).ExecCfg().ProtectedTimestampProvider 678 return createProtectedTimestampRecord(ctx, pts, txn, *b.job.ID(), 679 details.Targets, *resolved, cp) 680 }