github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/create_stats_job_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package stats_test 12 13 import ( 14 "context" 15 "fmt" 16 "testing" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/base" 20 "github.com/cockroachdb/cockroach/pkg/jobs" 21 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 22 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 23 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 24 "github.com/cockroachdb/cockroach/pkg/roachpb" 25 "github.com/cockroachdb/cockroach/pkg/sql/row" 26 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 27 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 28 "github.com/cockroachdb/cockroach/pkg/testutils" 29 "github.com/cockroachdb/cockroach/pkg/testutils/jobutils" 30 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 31 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 32 "github.com/cockroachdb/cockroach/pkg/util/encoding" 33 "github.com/cockroachdb/cockroach/pkg/util/hlc" 34 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 35 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 36 "github.com/cockroachdb/cockroach/pkg/util/retry" 37 "github.com/cockroachdb/errors" 38 ) 39 40 // TestCreateStatsControlJob tests that PAUSE JOB, RESUME JOB, and CANCEL JOB 41 // work as intended on create statistics jobs. 42 func TestCreateStatsControlJob(t *testing.T) { 43 defer leaktest.AfterTest(t)() 44 45 defer func(oldInterval time.Duration) { 46 jobs.DefaultAdoptInterval = oldInterval 47 }(jobs.DefaultAdoptInterval) 48 jobs.DefaultAdoptInterval = 100 * time.Millisecond 49 50 // Test with 3 nodes and rowexec.SamplerProgressInterval=100 to ensure 51 // that progress metadata is sent correctly after every 100 input rows. 52 const nodes = 3 53 defer func(oldSamplerInterval int, oldSampleAgggregatorInterval time.Duration) { 54 rowexec.SamplerProgressInterval = oldSamplerInterval 55 rowexec.SampleAggregatorProgressInterval = oldSampleAgggregatorInterval 56 }(rowexec.SamplerProgressInterval, rowexec.SampleAggregatorProgressInterval) 57 rowexec.SamplerProgressInterval = 100 58 rowexec.SampleAggregatorProgressInterval = time.Millisecond 59 60 var allowRequest chan struct{} 61 62 var serverArgs base.TestServerArgs 63 params := base.TestClusterArgs{ServerArgs: serverArgs} 64 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 65 TestingRequestFilter: createStatsRequestFilter(&allowRequest), 66 } 67 68 ctx := context.Background() 69 tc := testcluster.StartTestCluster(t, nodes, params) 70 defer tc.Stopper().Stop(ctx) 71 sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 72 sqlDB.Exec(t, `CREATE DATABASE d`) 73 sqlDB.Exec(t, `CREATE TABLE d.t (x INT PRIMARY KEY)`) 74 sqlDB.Exec(t, `INSERT INTO d.t SELECT generate_series(1,1000)`) 75 76 t.Run("cancel", func(t *testing.T) { 77 // Test that CREATE STATISTICS can be canceled. 78 query := fmt.Sprintf(`CREATE STATISTICS s1 FROM d.t`) 79 80 if _, err := jobutils.RunJob( 81 t, sqlDB, &allowRequest, []string{"cancel"}, query, 82 ); err == nil { 83 t.Fatal("expected an error") 84 } 85 86 // There should be no results here. 87 sqlDB.CheckQueryResults(t, 88 `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE d.t]`, 89 [][]string{}) 90 }) 91 92 t.Run("pause", func(t *testing.T) { 93 // Test that CREATE STATISTICS can be paused and resumed. 94 query := fmt.Sprintf(`CREATE STATISTICS s2 FROM d.t`) 95 96 jobID, err := jobutils.RunJob( 97 t, sqlDB, &allowRequest, []string{"PAUSE"}, query, 98 ) 99 if !testutils.IsError(err, "pause") && !testutils.IsError(err, "liveness") { 100 t.Fatalf("unexpected: %v", err) 101 } 102 103 // There should be no results here. 104 sqlDB.CheckQueryResults(t, 105 `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE d.t]`, 106 [][]string{}) 107 opts := retry.Options{ 108 InitialBackoff: 1 * time.Millisecond, 109 MaxBackoff: time.Second, 110 Multiplier: 2, 111 } 112 if err := retry.WithMaxAttempts(context.Background(), opts, 10, func() error { 113 _, err := sqlDB.DB.ExecContext(context.Background(), `RESUME JOB $1`, jobID) 114 return err 115 }); err != nil { 116 t.Fatal(err) 117 } 118 119 jobutils.WaitForJob(t, sqlDB, jobID) 120 121 // Now the job should have succeeded in producing stats. 122 sqlDB.CheckQueryResults(t, 123 `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE d.t]`, 124 [][]string{ 125 {"s2", "{x}", "1000"}, 126 }) 127 }) 128 } 129 130 // TestCreateStatsLivenessWithRestart tests that a node liveness transition 131 // during CREATE STATISTICS correctly resumes after the node executing the job 132 // becomes non-live (from the perspective of the jobs registry). 133 func TestCreateStatsLivenessWithRestart(t *testing.T) { 134 defer leaktest.AfterTest(t)() 135 136 defer func(oldAdoptInterval, oldCancelInterval time.Duration) { 137 jobs.DefaultAdoptInterval = oldAdoptInterval 138 jobs.DefaultCancelInterval = oldCancelInterval 139 }(jobs.DefaultAdoptInterval, jobs.DefaultCancelInterval) 140 jobs.DefaultAdoptInterval = 100 * time.Millisecond 141 jobs.DefaultCancelInterval = 100 * time.Millisecond 142 143 const nodes = 1 144 nl := jobs.NewFakeNodeLiveness(nodes) 145 serverArgs := base.TestServerArgs{ 146 Knobs: base.TestingKnobs{ 147 RegistryLiveness: nl, 148 }, 149 } 150 151 var allowRequest chan struct{} 152 params := base.TestClusterArgs{ServerArgs: serverArgs} 153 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 154 TestingRequestFilter: createStatsRequestFilter(&allowRequest), 155 } 156 157 ctx := context.Background() 158 tc := testcluster.StartTestCluster(t, nodes, params) 159 defer tc.Stopper().Stop(ctx) 160 conn := tc.Conns[0] 161 sqlDB := sqlutils.MakeSQLRunner(conn) 162 163 sqlDB.Exec(t, `CREATE DATABASE liveness`) 164 sqlDB.Exec(t, `CREATE TABLE liveness.t (i INT8 PRIMARY KEY)`) 165 sqlDB.Exec(t, `INSERT INTO liveness.t SELECT generate_series(1,1000)`) 166 167 const query = `CREATE STATISTICS s1 FROM liveness.t` 168 169 // Start a CREATE STATISTICS run and wait until it's done one scan. 170 allowRequest = make(chan struct{}) 171 errCh := make(chan error) 172 go func() { 173 _, err := conn.Exec(query) 174 errCh <- err 175 }() 176 select { 177 case allowRequest <- struct{}{}: 178 case err := <-errCh: 179 t.Fatal(err) 180 } 181 182 // Fetch the new job ID and lease since we know it's running now. 183 var jobID int64 184 originalLease := &jobspb.Progress{} 185 { 186 var expectedLeaseBytes []byte 187 sqlDB.QueryRow( 188 t, `SELECT id, progress FROM system.jobs ORDER BY created DESC LIMIT 1`, 189 ).Scan(&jobID, &expectedLeaseBytes) 190 if err := protoutil.Unmarshal(expectedLeaseBytes, originalLease); err != nil { 191 t.Fatal(err) 192 } 193 } 194 195 // Make the node non-live and wait for cancellation. 196 nl.FakeSetExpiration(1, hlc.MinTimestamp) 197 // Wait for the registry cancel loop to run and cancel the job. 198 <-nl.SelfCalledCh 199 <-nl.SelfCalledCh 200 close(allowRequest) 201 err := <-errCh 202 if !testutils.IsError(err, "job .*: node liveness error") { 203 t.Fatalf("unexpected: %v", err) 204 } 205 206 // Ensure that complete progress has not been recorded. 207 partialProgress := jobutils.GetJobProgress(t, sqlDB, jobID) 208 if partialProgress.Progress != nil && 209 partialProgress.Progress.(*jobspb.Progress_FractionCompleted).FractionCompleted == 1 { 210 t.Fatal("create stats should not have recorded progress") 211 } 212 213 // Make the node live again. 214 nl.FakeSetExpiration(1, hlc.MaxTimestamp) 215 216 // The registry should now adopt the job and resume it. 217 jobutils.WaitForJob(t, sqlDB, jobID) 218 219 // Verify that the job lease was updated. 220 rescheduledProgress := jobutils.GetJobProgress(t, sqlDB, jobID) 221 if rescheduledProgress.ModifiedMicros <= originalLease.ModifiedMicros { 222 t.Fatalf("expecting rescheduled job to have a later modification time: %d vs %d", 223 rescheduledProgress.ModifiedMicros, originalLease.ModifiedMicros) 224 } 225 226 // Verify that progress is now recorded. 227 if rescheduledProgress.Progress.(*jobspb.Progress_FractionCompleted).FractionCompleted != 1 { 228 t.Fatal("create stats should have recorded progress") 229 } 230 231 // Now the job should have succeeded in producing stats. 232 sqlDB.CheckQueryResults(t, 233 `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE liveness.t]`, 234 [][]string{ 235 {"s1", "{i}", "1000"}, 236 }) 237 } 238 239 // TestCreateStatsLivenessWithLeniency tests that a temporary node liveness 240 // transition during CREATE STATISTICS doesn't cancel the job, but allows the 241 // owning node to continue processing. 242 func TestCreateStatsLivenessWithLeniency(t *testing.T) { 243 defer leaktest.AfterTest(t)() 244 245 defer func(oldAdoptInterval, oldCancelInterval time.Duration) { 246 jobs.DefaultAdoptInterval = oldAdoptInterval 247 jobs.DefaultCancelInterval = oldCancelInterval 248 }(jobs.DefaultAdoptInterval, jobs.DefaultCancelInterval) 249 jobs.DefaultAdoptInterval = 100 * time.Millisecond 250 jobs.DefaultCancelInterval = 100 * time.Millisecond 251 252 const nodes = 1 253 nl := jobs.NewFakeNodeLiveness(nodes) 254 serverArgs := base.TestServerArgs{ 255 Knobs: base.TestingKnobs{ 256 RegistryLiveness: nl, 257 }, 258 } 259 260 var allowRequest chan struct{} 261 params := base.TestClusterArgs{ServerArgs: serverArgs} 262 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 263 TestingRequestFilter: createStatsRequestFilter(&allowRequest), 264 } 265 266 ctx := context.Background() 267 tc := testcluster.StartTestCluster(t, nodes, params) 268 defer tc.Stopper().Stop(ctx) 269 conn := tc.Conns[0] 270 sqlDB := sqlutils.MakeSQLRunner(conn) 271 272 // We want to know exactly how much leniency is configured. 273 sqlDB.Exec(t, `SET CLUSTER SETTING jobs.registry.leniency = '1m'`) 274 sqlDB.Exec(t, `CREATE DATABASE liveness`) 275 sqlDB.Exec(t, `CREATE TABLE liveness.t (i INT8 PRIMARY KEY)`) 276 sqlDB.Exec(t, `INSERT INTO liveness.t SELECT generate_series(1,1000)`) 277 278 const query = `CREATE STATISTICS s1 FROM liveness.t` 279 280 // Start a CREATE STATISTICS run and wait until it's done one scan. 281 allowRequest = make(chan struct{}) 282 errCh := make(chan error) 283 go func() { 284 _, err := conn.Exec(query) 285 errCh <- err 286 }() 287 select { 288 case allowRequest <- struct{}{}: 289 case err := <-errCh: 290 t.Fatal(err) 291 } 292 293 // Fetch the new job ID and lease since we know it's running now. 294 var jobID int64 295 originalLease := &jobspb.Payload{} 296 { 297 var expectedLeaseBytes []byte 298 sqlDB.QueryRow( 299 t, `SELECT id, payload FROM system.jobs ORDER BY created DESC LIMIT 1`, 300 ).Scan(&jobID, &expectedLeaseBytes) 301 if err := protoutil.Unmarshal(expectedLeaseBytes, originalLease); err != nil { 302 t.Fatal(err) 303 } 304 } 305 306 // Make the node slightly tardy. 307 nl.FakeSetExpiration(1, hlc.Timestamp{ 308 WallTime: hlc.UnixNano() - (15 * time.Second).Nanoseconds(), 309 }) 310 311 // Wait for the registry cancel loop to run and not cancel the job. 312 <-nl.SelfCalledCh 313 <-nl.SelfCalledCh 314 close(allowRequest) 315 316 // Set the node to be fully live again. This prevents the registry 317 // from canceling all of the jobs if the test node is saturated 318 // and the create stats runs slowly. 319 nl.FakeSetExpiration(1, hlc.MaxTimestamp) 320 321 // Verify that the client didn't see anything amiss. 322 if err := <-errCh; err != nil { 323 t.Fatalf("create stats job should have completed: %s", err) 324 } 325 326 // The job should have completed normally. 327 jobutils.WaitForJob(t, sqlDB, jobID) 328 } 329 330 func TestAtMostOneRunningCreateStats(t *testing.T) { 331 defer leaktest.AfterTest(t)() 332 333 defer func(oldAdoptInterval time.Duration) { 334 jobs.DefaultAdoptInterval = oldAdoptInterval 335 }(jobs.DefaultAdoptInterval) 336 jobs.DefaultAdoptInterval = 100 * time.Millisecond 337 338 var allowRequest chan struct{} 339 340 var serverArgs base.TestServerArgs 341 params := base.TestClusterArgs{ServerArgs: serverArgs} 342 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 343 TestingRequestFilter: createStatsRequestFilter(&allowRequest), 344 } 345 346 ctx := context.Background() 347 const nodes = 1 348 tc := testcluster.StartTestCluster(t, nodes, params) 349 defer tc.Stopper().Stop(ctx) 350 conn := tc.Conns[0] 351 sqlDB := sqlutils.MakeSQLRunner(conn) 352 353 sqlDB.Exec(t, `CREATE DATABASE d`) 354 sqlDB.Exec(t, `CREATE TABLE d.t (x INT PRIMARY KEY)`) 355 sqlDB.Exec(t, `INSERT INTO d.t SELECT generate_series(1,1000)`) 356 357 // Start a CREATE STATISTICS run and wait until it's done one scan. 358 allowRequest = make(chan struct{}) 359 errCh := make(chan error) 360 go func() { 361 _, err := conn.Exec(`CREATE STATISTICS s1 FROM d.t`) 362 errCh <- err 363 }() 364 select { 365 case allowRequest <- struct{}{}: 366 case err := <-errCh: 367 t.Fatal(err) 368 } 369 370 autoStatsRunShouldFail := func() { 371 expectErrCh := make(chan error, 1) 372 go func() { 373 _, err := conn.Exec(`CREATE STATISTICS __auto__ FROM d.t`) 374 expectErrCh <- err 375 }() 376 select { 377 case err := <-expectErrCh: 378 expected := "another CREATE STATISTICS job is already running" 379 if !testutils.IsError(err, expected) { 380 t.Fatalf("expected '%s' error, but got %v", expected, err) 381 } 382 case <-time.After(time.Second): 383 panic("CREATE STATISTICS job which was expected to fail, timed out instead") 384 } 385 } 386 387 // Attempt to start an automatic stats run. It should fail. 388 autoStatsRunShouldFail() 389 390 // PAUSE JOB does not bloack until the job is paused but only requests it. 391 // Wait until the job is set to paused. 392 var jobID int64 393 sqlDB.QueryRow(t, `SELECT id FROM system.jobs ORDER BY created DESC LIMIT 1`).Scan(&jobID) 394 opts := retry.Options{ 395 InitialBackoff: 1 * time.Millisecond, 396 MaxBackoff: time.Second, 397 Multiplier: 2, 398 } 399 if err := retry.WithMaxAttempts(context.Background(), opts, 10, func() error { 400 _, err := sqlDB.DB.ExecContext(context.Background(), `PAUSE JOB $1`, jobID) 401 if err != nil { 402 t.Fatal(err) 403 } 404 var status string 405 sqlDB.QueryRow(t, `SELECT status FROM system.jobs WHERE id = $1 LIMIT 1`, jobID).Scan(&status) 406 if status != "paused" { 407 return errors.New("could not pause job") 408 } 409 return err 410 }); err != nil { 411 t.Fatal(err) 412 } 413 414 // Starting another automatic stats run should still fail. 415 autoStatsRunShouldFail() 416 417 // Attempt to start a regular stats run. It should succeed. 418 errCh2 := make(chan error) 419 go func() { 420 _, err := conn.Exec(`CREATE STATISTICS s2 FROM d.t`) 421 errCh2 <- err 422 }() 423 select { 424 case allowRequest <- struct{}{}: 425 case err := <-errCh: 426 t.Fatal(err) 427 case err := <-errCh2: 428 t.Fatal(err) 429 } 430 close(allowRequest) 431 432 // Verify that the second job completed successfully. 433 if err := <-errCh2; err != nil { 434 t.Fatalf("create stats job should have completed: %s", err) 435 } 436 437 // Verify that the first job completed successfully. 438 sqlDB.Exec(t, fmt.Sprintf("RESUME JOB %d", jobID)) 439 jobutils.WaitForJob(t, sqlDB, jobID) 440 <-errCh 441 } 442 443 func TestDeleteFailedJob(t *testing.T) { 444 defer leaktest.AfterTest(t)() 445 446 defer func(oldAdoptInterval time.Duration) { 447 jobs.DefaultAdoptInterval = oldAdoptInterval 448 }(jobs.DefaultAdoptInterval) 449 jobs.DefaultAdoptInterval = 100 * time.Millisecond 450 451 ctx := context.Background() 452 tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{}) 453 defer tc.Stopper().Stop(ctx) 454 conn := tc.Conns[0] 455 sqlDB := sqlutils.MakeSQLRunner(conn) 456 457 sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false`) 458 sqlDB.Exec(t, `CREATE DATABASE d`) 459 sqlDB.Exec(t, `CREATE TABLE d.t (x INT PRIMARY KEY)`) 460 sqlDB.Exec(t, `CREATE TABLE d.u (x INT PRIMARY KEY)`) 461 sqlDB.Exec(t, `INSERT INTO d.t SELECT generate_series(1,1000)`) 462 sqlDB.Exec(t, `INSERT INTO d.u SELECT generate_series(1,1000)`) 463 464 // Start two CREATE STATISTICS runs at once. 465 errCh1 := make(chan error) 466 go func() { 467 _, err := conn.Exec(`CREATE STATISTICS __auto__ FROM d.t`) 468 errCh1 <- err 469 }() 470 errCh2 := make(chan error) 471 go func() { 472 _, err := conn.Exec(`CREATE STATISTICS __auto__ FROM d.u`) 473 errCh2 <- err 474 }() 475 476 err1 := <-errCh1 477 err2 := <-errCh2 478 479 // At least one of the jobs should have succeeded. 480 if err1 != nil && err2 != nil { 481 t.Fatalf("one job should have succeeded but both failed. err1:%v, err2:%v", err1, err2) 482 } 483 484 // Check that if one of the jobs failed, it was deleted and doesn't show up in 485 // SHOW AUTOMATIC JOBS. 486 // Note: if this test fails, it will likely show up by using stressrace. 487 if res := sqlDB.QueryStr(t, 488 `SELECT statement, status, error FROM [SHOW AUTOMATIC JOBS] WHERE status = $1`, 489 jobs.StatusFailed, 490 ); len(res) != 0 { 491 t.Fatalf("job should have been deleted but found: %v", res) 492 } 493 } 494 495 // TestCreateStatsProgress tests that progress reporting works correctly 496 // for the CREATE STATISTICS job. 497 func TestCreateStatsProgress(t *testing.T) { 498 defer leaktest.AfterTest(t)() 499 500 defer func(oldProgressInterval time.Duration) { 501 rowexec.SampleAggregatorProgressInterval = oldProgressInterval 502 }(rowexec.SampleAggregatorProgressInterval) 503 rowexec.SampleAggregatorProgressInterval = time.Nanosecond 504 505 defer func(oldProgressInterval int) { 506 rowexec.SamplerProgressInterval = oldProgressInterval 507 }(rowexec.SamplerProgressInterval) 508 rowexec.SamplerProgressInterval = 10 509 510 resetKVBatchSize := row.TestingSetKVBatchSize(10) 511 defer resetKVBatchSize() 512 513 var allowRequest chan struct{} 514 var serverArgs base.TestServerArgs 515 params := base.TestClusterArgs{ServerArgs: serverArgs} 516 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 517 TestingRequestFilter: createStatsRequestFilter(&allowRequest), 518 } 519 520 ctx := context.Background() 521 const nodes = 1 522 tc := testcluster.StartTestCluster(t, nodes, params) 523 defer tc.Stopper().Stop(ctx) 524 conn := tc.Conns[0] 525 sqlDB := sqlutils.MakeSQLRunner(conn) 526 527 sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false`) 528 sqlDB.Exec(t, `CREATE DATABASE d`) 529 sqlDB.Exec(t, `CREATE TABLE d.t (i INT8 PRIMARY KEY)`) 530 sqlDB.Exec(t, `INSERT INTO d.t SELECT generate_series(1,1000)`) 531 532 getFractionCompleted := func(jobID int64) float32 { 533 var progress *jobspb.Progress 534 testutils.SucceedsSoon(t, func() error { 535 progress = jobutils.GetJobProgress(t, sqlDB, jobID) 536 if progress.Progress == nil { 537 return errors.Errorf("progress is nil. jobID: %d", jobID) 538 } 539 return nil 540 }) 541 return progress.Progress.(*jobspb.Progress_FractionCompleted).FractionCompleted 542 } 543 544 const query = `CREATE STATISTICS s1 FROM d.t` 545 546 // Start a CREATE STATISTICS run and wait until it has scanned part of the 547 // table. 548 allowRequest = make(chan struct{}) 549 errCh := make(chan error) 550 go func() { 551 _, err := conn.Exec(query) 552 errCh <- err 553 }() 554 // Ten iterations here allows us to read some of the rows but not all. 555 for i := 0; i < 10; i++ { 556 select { 557 case allowRequest <- struct{}{}: 558 case err := <-errCh: 559 t.Fatal(err) 560 } 561 } 562 563 // Fetch the new job ID since we know it's running now. 564 jobID := jobutils.GetLastJobID(t, sqlDB) 565 566 // Ensure that 0 progress has been recorded since there are no existing 567 // stats available to estimate progress. 568 fractionCompleted := getFractionCompleted(jobID) 569 if fractionCompleted != 0 { 570 t.Fatalf( 571 "create stats should not have recorded progress, but progress is %f", 572 fractionCompleted, 573 ) 574 } 575 576 // Allow the job to complete and verify that the client didn't see anything 577 // amiss. 578 close(allowRequest) 579 if err := <-errCh; err != nil { 580 t.Fatalf("create stats job should have completed: %s", err) 581 } 582 583 // Verify that full progress is now recorded. 584 fractionCompleted = getFractionCompleted(jobID) 585 if fractionCompleted != 1 { 586 t.Fatalf( 587 "create stats should have recorded full progress, but progress is %f", 588 fractionCompleted, 589 ) 590 } 591 592 // Start another CREATE STATISTICS run and wait until it has scanned part of 593 // the table. 594 allowRequest = make(chan struct{}) 595 go func() { 596 _, err := conn.Exec(query) 597 errCh <- err 598 }() 599 // Ten iterations here allows us to read some of the rows but not all. 600 for i := 0; i < 10; i++ { 601 select { 602 case allowRequest <- struct{}{}: 603 case err := <-errCh: 604 t.Fatal(err) 605 } 606 } 607 608 // Fetch the new job ID since we know it's running now. 609 jobID = jobutils.GetLastJobID(t, sqlDB) 610 611 // Ensure that partial progress has been recorded since there are existing 612 // stats available. 613 fractionCompleted = getFractionCompleted(jobID) 614 if fractionCompleted <= 0 || fractionCompleted > 0.99 { 615 t.Fatalf( 616 "create stats should have recorded partial progress, but progress is %f", 617 fractionCompleted, 618 ) 619 } 620 621 // Allow the job to complete and verify that the client didn't see anything 622 // amiss. 623 close(allowRequest) 624 if err := <-errCh; err != nil { 625 t.Fatalf("create stats job should have completed: %s", err) 626 } 627 628 // Verify that full progress is now recorded. 629 fractionCompleted = getFractionCompleted(jobID) 630 if fractionCompleted != 1 { 631 t.Fatalf( 632 "create stats should have recorded full progress, but progress is %f", 633 fractionCompleted, 634 ) 635 } 636 } 637 638 func TestCreateStatsAsOfTime(t *testing.T) { 639 defer leaktest.AfterTest(t)() 640 641 ctx := context.Background() 642 tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{}) 643 defer tc.Stopper().Stop(ctx) 644 sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 645 sqlDB.Exec(t, `CREATE DATABASE d`) 646 sqlDB.Exec(t, `CREATE TABLE d.t (x INT PRIMARY KEY)`) 647 648 var ts1 []uint8 649 sqlDB.QueryRow(t, ` 650 INSERT INTO d.t VALUES (1) 651 RETURNING cluster_logical_timestamp(); 652 `).Scan(&ts1) 653 654 sqlDB.Exec(t, `INSERT INTO d.t VALUES (2)`) 655 656 sqlDB.Exec(t, fmt.Sprintf("CREATE STATISTICS s FROM d.t AS OF SYSTEM TIME %s", string(ts1))) 657 658 // Check that we only see the first row, not the second. 659 sqlDB.CheckQueryResults(t, 660 `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE d.t]`, 661 [][]string{ 662 {"s", "{x}", "1"}, 663 }) 664 } 665 666 // Create a blocking request filter for the actions related 667 // to CREATE STATISTICS, i.e. Scanning a user table. See discussion 668 // on jobutils.RunJob for where this might be useful. 669 func createStatsRequestFilter(allowProgressIota *chan struct{}) kvserverbase.ReplicaRequestFilter { 670 return func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error { 671 if req, ok := ba.GetArg(roachpb.Scan); ok { 672 _, tableID, _ := encoding.DecodeUvarintAscending(req.(*roachpb.ScanRequest).Key) 673 // Ensure that the tableID is within the expected range for a table, 674 // but is not a system table. 675 if tableID > 0 && tableID < 100 && !sqlbase.IsReservedID(sqlbase.ID(tableID)) { 676 // Read from the channel twice to allow jobutils.RunJob to complete 677 // even though there is only one ScanRequest. 678 <-*allowProgressIota 679 <-*allowProgressIota 680 } 681 } 682 return nil 683 } 684 }