github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/testutils/jobutils/jobs_verification.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package jobutils
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  	"reflect"
    18  	"sort"
    19  	"strconv"
    20  	"strings"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/cockroach/pkg/jobs"
    25  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    26  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    27  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    29  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    30  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    31  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    32  	"github.com/cockroachdb/errors"
    33  	"github.com/kr/pretty"
    34  	"github.com/lib/pq"
    35  )
    36  
    37  // WaitForJob waits for the specified job ID to terminate.
    38  func WaitForJob(t testing.TB, db *sqlutils.SQLRunner, jobID int64) {
    39  	t.Helper()
    40  	if err := retry.ForDuration(time.Minute*2, func() error {
    41  		var status string
    42  		var payloadBytes []byte
    43  		db.QueryRow(
    44  			t, `SELECT status, payload FROM system.jobs WHERE id = $1`, jobID,
    45  		).Scan(&status, &payloadBytes)
    46  		if jobs.Status(status) == jobs.StatusFailed {
    47  			payload := &jobspb.Payload{}
    48  			if err := protoutil.Unmarshal(payloadBytes, payload); err == nil {
    49  				t.Fatalf("job failed: %s", payload.Error)
    50  			}
    51  			t.Fatalf("job failed")
    52  		}
    53  		if e, a := jobs.StatusSucceeded, jobs.Status(status); e != a {
    54  			return errors.Errorf("expected job status %s, but got %s", e, a)
    55  		}
    56  		return nil
    57  	}); err != nil {
    58  		t.Fatal(err)
    59  	}
    60  }
    61  
    62  // RunJob runs the provided job control statement, intializing, notifying and
    63  // closing the chan at the passed pointer (see below for why) and returning the
    64  // jobID and error result. PAUSE JOB and CANCEL JOB are racy in that it's hard
    65  // to guarantee that the job is still running when executing a PAUSE or
    66  // CANCEL--or that the job has even started running. To synchronize, we can
    67  // install a store response filter which does a blocking receive for one of the
    68  // responses used by our job (for example, Export for a BACKUP). Later, when we
    69  // want to guarantee the job is in progress, we do exactly one blocking send.
    70  // When this send completes, we know the job has started, as we've seen one
    71  // expected response. We also know the job has not finished, because we're
    72  // blocking all future responses until we close the channel, and our operation
    73  // is large enough that it will generate more than one of the expected response.
    74  func RunJob(
    75  	t *testing.T,
    76  	db *sqlutils.SQLRunner,
    77  	allowProgressIota *chan struct{},
    78  	ops []string,
    79  	query string,
    80  	args ...interface{},
    81  ) (int64, error) {
    82  	*allowProgressIota = make(chan struct{})
    83  	errCh := make(chan error)
    84  	go func() {
    85  		_, err := db.DB.ExecContext(context.TODO(), query, args...)
    86  		errCh <- err
    87  	}()
    88  	select {
    89  	case *allowProgressIota <- struct{}{}:
    90  	case err := <-errCh:
    91  		return 0, errors.Wrapf(err, "query returned before expected: %s", query)
    92  	}
    93  	var jobID int64
    94  	db.QueryRow(t, `SELECT id FROM system.jobs ORDER BY created DESC LIMIT 1`).Scan(&jobID)
    95  	for _, op := range ops {
    96  		db.Exec(t, fmt.Sprintf("%s JOB %d", op, jobID))
    97  		*allowProgressIota <- struct{}{}
    98  	}
    99  	close(*allowProgressIota)
   100  	return jobID, <-errCh
   101  }
   102  
   103  // BulkOpResponseFilter creates a blocking response filter for the responses
   104  // related to bulk IO/backup/restore/import: Export, Import and AddSSTable. See
   105  // discussion on RunJob for where this might be useful.
   106  func BulkOpResponseFilter(allowProgressIota *chan struct{}) kvserverbase.ReplicaResponseFilter {
   107  	return func(_ context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse) *roachpb.Error {
   108  		for _, ru := range br.Responses {
   109  			switch ru.GetInner().(type) {
   110  			case *roachpb.ExportResponse, *roachpb.ImportResponse, *roachpb.AddSSTableResponse:
   111  				<-*allowProgressIota
   112  			}
   113  		}
   114  		return nil
   115  	}
   116  }
   117  
   118  func verifySystemJob(
   119  	t testing.TB,
   120  	db *sqlutils.SQLRunner,
   121  	offset int,
   122  	filterType jobspb.Type,
   123  	expectedStatus string,
   124  	expectedRunningStatus string,
   125  	expected jobs.Record,
   126  ) error {
   127  	var actual jobs.Record
   128  	var rawDescriptorIDs pq.Int64Array
   129  	var statusString string
   130  	var runningStatus gosql.NullString
   131  	var runningStatusString string
   132  	// We have to query for the nth job created rather than filtering by ID,
   133  	// because job-generating SQL queries (e.g. BACKUP) do not currently return
   134  	// the job ID.
   135  	db.QueryRow(t, `
   136  		SELECT description, user_name, descriptor_ids, status, running_status
   137  		FROM crdb_internal.jobs WHERE job_type = $1 ORDER BY created LIMIT 1 OFFSET $2`,
   138  		filterType.String(),
   139  		offset,
   140  	).Scan(
   141  		&actual.Description, &actual.Username, &rawDescriptorIDs,
   142  		&statusString, &runningStatus,
   143  	)
   144  	if runningStatus.Valid {
   145  		runningStatusString = runningStatus.String
   146  	}
   147  
   148  	for _, id := range rawDescriptorIDs {
   149  		actual.DescriptorIDs = append(actual.DescriptorIDs, sqlbase.ID(id))
   150  	}
   151  	sort.Sort(actual.DescriptorIDs)
   152  	sort.Sort(expected.DescriptorIDs)
   153  	expected.Details = nil
   154  	if e, a := expected, actual; !reflect.DeepEqual(e, a) {
   155  		return errors.Errorf("job %d did not match:\n%s",
   156  			offset, strings.Join(pretty.Diff(e, a), "\n"))
   157  	}
   158  
   159  	if expectedStatus != statusString {
   160  		return errors.Errorf("job %d: expected status %v, got %v", offset, expectedStatus, statusString)
   161  	}
   162  	if expectedRunningStatus != "" && expectedRunningStatus != runningStatusString {
   163  		return errors.Errorf("job %d: expected running status %v, got %v",
   164  			offset, expectedRunningStatus, runningStatusString)
   165  	}
   166  
   167  	return nil
   168  }
   169  
   170  // VerifyRunningSystemJob checks that job records are created as expected
   171  // and is marked as running.
   172  func VerifyRunningSystemJob(
   173  	t testing.TB,
   174  	db *sqlutils.SQLRunner,
   175  	offset int,
   176  	filterType jobspb.Type,
   177  	expectedRunningStatus jobs.RunningStatus,
   178  	expected jobs.Record,
   179  ) error {
   180  	return verifySystemJob(t, db, offset, filterType, "running", string(expectedRunningStatus), expected)
   181  }
   182  
   183  // VerifySystemJob checks that job records are created as expected.
   184  func VerifySystemJob(
   185  	t testing.TB,
   186  	db *sqlutils.SQLRunner,
   187  	offset int,
   188  	filterType jobspb.Type,
   189  	expectedStatus jobs.Status,
   190  	expected jobs.Record,
   191  ) error {
   192  	return verifySystemJob(t, db, offset, filterType, string(expectedStatus), "", expected)
   193  }
   194  
   195  // GetJobFormatVersion returns the format version of a schema change job.
   196  // Will fail the test if the jobID does not reference a schema change job.
   197  func GetJobFormatVersion(
   198  	t testing.TB, db *sqlutils.SQLRunner,
   199  ) jobspb.SchemaChangeDetailsFormatVersion {
   200  	rows := db.QueryStr(t, "SELECT * FROM [SHOW JOBS] WHERE job_type = 'SCHEMA CHANGE' AND description <> 'updating privileges' ORDER BY created DESC LIMIT 1")
   201  	if len(rows) != 1 {
   202  		t.Fatal("expected exactly one row when checking the format version")
   203  	}
   204  	jobID, err := strconv.Atoi(rows[0][0])
   205  	if err != nil {
   206  		t.Fatal(err)
   207  	}
   208  
   209  	var payloadBytes []byte
   210  	db.QueryRow(t, `SELECT payload FROM system.jobs WHERE id = $1`, jobID).Scan(&payloadBytes)
   211  
   212  	payload := &jobspb.Payload{}
   213  	if err := protoutil.Unmarshal(payloadBytes, payload); err != nil {
   214  		t.Fatal(err)
   215  	}
   216  	// Lease is always nil in 19.2.
   217  	payload.Lease = nil
   218  
   219  	details := payload.GetSchemaChange()
   220  	return details.FormatVersion
   221  }
   222  
   223  // GetJobID gets a particular job's ID.
   224  func GetJobID(t testing.TB, db *sqlutils.SQLRunner, offset int) int64 {
   225  	var jobID int64
   226  	db.QueryRow(t, `
   227  	SELECT job_id FROM crdb_internal.jobs ORDER BY created LIMIT 1 OFFSET $1`, offset,
   228  	).Scan(&jobID)
   229  	return jobID
   230  }
   231  
   232  // GetLastJobID gets the most recent job's ID.
   233  func GetLastJobID(t testing.TB, db *sqlutils.SQLRunner) int64 {
   234  	var jobID int64
   235  	db.QueryRow(
   236  		t, `SELECT id FROM system.jobs ORDER BY created DESC LIMIT 1`,
   237  	).Scan(&jobID)
   238  	return jobID
   239  }
   240  
   241  // GetJobProgress loads the Progress message associated with the job.
   242  func GetJobProgress(t *testing.T, db *sqlutils.SQLRunner, jobID int64) *jobspb.Progress {
   243  	ret := &jobspb.Progress{}
   244  	var buf []byte
   245  	db.QueryRow(t, `SELECT progress FROM system.jobs WHERE id = $1`, jobID).Scan(&buf)
   246  	if err := protoutil.Unmarshal(buf, ret); err != nil {
   247  		t.Fatal(err)
   248  	}
   249  	return ret
   250  }