github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/workloadccl/allccl/all_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package allccl
    10  
    11  import (
    12  	"context"
    13  	"encoding/binary"
    14  	"hash"
    15  	"hash/fnv"
    16  	"math"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/base"
    20  	_ "github.com/cockroachdb/cockroach/pkg/ccl"
    21  	"github.com/cockroachdb/cockroach/pkg/ccl/workloadccl"
    22  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    24  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    25  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    26  	"github.com/cockroachdb/cockroach/pkg/util"
    27  	"github.com/cockroachdb/cockroach/pkg/util/bufalloc"
    28  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    29  	"github.com/cockroachdb/cockroach/pkg/workload"
    30  	"github.com/cockroachdb/cockroach/pkg/workload/workloadsql"
    31  	"github.com/cockroachdb/errors"
    32  	"github.com/stretchr/testify/require"
    33  )
    34  
    35  func bigInitialData(meta workload.Meta) bool {
    36  	switch meta.Name {
    37  	case `tpcc`, `tpch`, `tpcds`:
    38  		return true
    39  	default:
    40  		return false
    41  	}
    42  }
    43  
    44  func TestAllRegisteredImportFixture(t *testing.T) {
    45  	defer leaktest.AfterTest(t)()
    46  
    47  	sqlMemoryPoolSize := int64(1000 << 20) // 1GiB
    48  
    49  	for _, meta := range workload.Registered() {
    50  		meta := meta
    51  		gen := meta.New()
    52  		hasInitialData := true
    53  		for _, table := range gen.Tables() {
    54  			if table.InitialRows.FillBatch == nil {
    55  				hasInitialData = false
    56  				break
    57  			}
    58  		}
    59  		if !hasInitialData {
    60  			continue
    61  		}
    62  
    63  		// This test is big enough that it causes timeout issues under race, so only
    64  		// run one workload. Doing any more than this doesn't get us enough to be
    65  		// worth the hassle.
    66  		if util.RaceEnabled && meta.Name != `bank` {
    67  			continue
    68  		}
    69  
    70  		switch meta.Name {
    71  		case `startrek`, `roachmart`, `interleavedpartitioned`:
    72  			// These don't work with IMPORT.
    73  			continue
    74  		case `tpch`:
    75  			// TODO(dan): Implement a timmed down version of TPCH to keep the test
    76  			// runtime down.
    77  			continue
    78  		}
    79  
    80  		t.Run(meta.Name, func(t *testing.T) {
    81  			if bigInitialData(meta) && testing.Short() {
    82  				t.Skipf(`%s loads a lot of data`, meta.Name)
    83  			}
    84  
    85  			ctx := context.Background()
    86  			s, db, _ := serverutils.StartServer(t, base.TestServerArgs{
    87  				UseDatabase:       "d",
    88  				SQLMemoryPoolSize: sqlMemoryPoolSize,
    89  			})
    90  			defer s.Stopper().Stop(ctx)
    91  			sqlutils.MakeSQLRunner(db).Exec(t, `CREATE DATABASE d`)
    92  
    93  			l := workloadccl.ImportDataLoader{}
    94  			if _, err := workloadsql.Setup(ctx, db, gen, l); err != nil {
    95  				t.Fatalf(`%+v`, err)
    96  			}
    97  
    98  			// Run the consistency check if this workload has one.
    99  			if h, ok := gen.(workload.Hookser); ok {
   100  				if checkConsistencyFn := h.Hooks().CheckConsistency; checkConsistencyFn != nil {
   101  					if err := checkConsistencyFn(ctx, db); err != nil {
   102  						t.Errorf(`%+v`, err)
   103  					}
   104  				}
   105  			}
   106  		})
   107  	}
   108  }
   109  
   110  func TestAllRegisteredSetup(t *testing.T) {
   111  	defer leaktest.AfterTest(t)()
   112  
   113  	for _, meta := range workload.Registered() {
   114  		if bigInitialData(meta) {
   115  			continue
   116  		}
   117  
   118  		// This test is big enough that it causes timeout issues under race, so only
   119  		// run one workload. Doing any more than this doesn't get us enough to be
   120  		// worth the hassle.
   121  		if util.RaceEnabled && meta.Name != `bank` {
   122  			continue
   123  		}
   124  
   125  		gen := meta.New()
   126  		switch meta.Name {
   127  		case `roachmart`:
   128  			// TODO(dan): It'd be nice to test this with the default flags. For now,
   129  			// this is better than nothing.
   130  			flags := gen.(workload.Flagser).Flags()
   131  			if err := flags.Parse([]string{
   132  				`--users=10`, `--orders=100`, `--partition=false`,
   133  			}); err != nil {
   134  				t.Fatal(err)
   135  			}
   136  		case `interleavedpartitioned`:
   137  			// This require a specific node locality setup
   138  			continue
   139  		}
   140  
   141  		t.Run(meta.Name, func(t *testing.T) {
   142  			ctx := context.Background()
   143  			s, db, _ := serverutils.StartServer(t, base.TestServerArgs{
   144  				UseDatabase: "d",
   145  			})
   146  			defer s.Stopper().Stop(ctx)
   147  			sqlutils.MakeSQLRunner(db).Exec(t, `CREATE DATABASE d`)
   148  			sqlutils.MakeSQLRunner(db).Exec(t, `SET CLUSTER SETTING kv.range_merge.queue_enabled = false`)
   149  
   150  			var l workloadsql.InsertsDataLoader
   151  			if _, err := workloadsql.Setup(ctx, db, gen, l); err != nil {
   152  				t.Fatalf(`%+v`, err)
   153  			}
   154  
   155  			// Run the consistency check if this workload has one.
   156  			if h, ok := gen.(workload.Hookser); ok {
   157  				if checkConsistencyFn := h.Hooks().CheckConsistency; checkConsistencyFn != nil {
   158  					if err := checkConsistencyFn(ctx, db); err != nil {
   159  						t.Errorf(`%+v`, err)
   160  					}
   161  				}
   162  			}
   163  		})
   164  	}
   165  }
   166  
   167  func TestConsistentSchema(t *testing.T) {
   168  	defer leaktest.AfterTest(t)()
   169  	// Test that the table schemas are consistent when the workload is created
   170  	// multiple times with the same seed.
   171  
   172  	for _, meta := range workload.Registered() {
   173  		t.Run(meta.Name, func(t *testing.T) {
   174  			tables1 := meta.New().Tables()
   175  			tables2 := meta.New().Tables()
   176  			for i := range tables1 {
   177  				name := tables1[i].Name
   178  				schema1 := tables1[i].Schema
   179  				schema2 := tables2[i].Schema
   180  				if schema1 != schema2 {
   181  					t.Errorf("schema mismatch for table %s: %s, %s", name, schema1, schema2)
   182  				}
   183  			}
   184  		})
   185  	}
   186  }
   187  
   188  func hashTableInitialData(
   189  	h hash.Hash, data workload.BatchedTuples, a *bufalloc.ByteAllocator,
   190  ) error {
   191  	var scratch [8]byte
   192  	b := coldata.NewMemBatchWithSize(nil /* types */, 0 /* size */, coldata.StandardColumnFactory)
   193  	for batchIdx := 0; batchIdx < data.NumBatches; batchIdx++ {
   194  		*a = (*a)[:0]
   195  		data.FillBatch(batchIdx, b, a)
   196  		for _, col := range b.ColVecs() {
   197  			switch t := col.Type(); col.CanonicalTypeFamily() {
   198  			case types.BoolFamily:
   199  				for _, x := range col.Bool()[:b.Length()] {
   200  					if x {
   201  						scratch[0] = 1
   202  					} else {
   203  						scratch[0] = 0
   204  					}
   205  					_, _ = h.Write(scratch[:1])
   206  				}
   207  			case types.IntFamily:
   208  				switch t.Width() {
   209  				case 0, 64:
   210  					for _, x := range col.Int64()[:b.Length()] {
   211  						binary.LittleEndian.PutUint64(scratch[:8], uint64(x))
   212  						_, _ = h.Write(scratch[:8])
   213  					}
   214  				case 16:
   215  					for _, x := range col.Int16()[:b.Length()] {
   216  						binary.LittleEndian.PutUint16(scratch[:2], uint16(x))
   217  						_, _ = h.Write(scratch[:2])
   218  					}
   219  				}
   220  			case types.FloatFamily:
   221  				for _, x := range col.Float64()[:b.Length()] {
   222  					bits := math.Float64bits(x)
   223  					binary.LittleEndian.PutUint64(scratch[:8], bits)
   224  					_, _ = h.Write(scratch[:8])
   225  				}
   226  			case types.BytesFamily:
   227  				colBytes := col.Bytes()
   228  				for i := 0; i < b.Length(); i++ {
   229  					_, _ = h.Write(colBytes.Get(i))
   230  				}
   231  			default:
   232  				return errors.Errorf(`unhandled type %s`, col.Type())
   233  			}
   234  		}
   235  	}
   236  	return nil
   237  }
   238  
   239  func TestDeterministicInitialData(t *testing.T) {
   240  	defer leaktest.AfterTest(t)()
   241  
   242  	// There are other tests that run initial data generation under race, so we
   243  	// don't get anything from running this one under race as well.
   244  	if util.RaceEnabled {
   245  		t.Skip(`uninteresting under race`)
   246  	}
   247  
   248  	// Hardcode goldens for the fingerprint of the initial data of generators with
   249  	// default flags. This lets us opt in generators known to be deterministic and
   250  	// also protects against initialized-once global state (which tpcc did have at
   251  	// one point).
   252  	//
   253  	// TODO(dan): We're starting to accumulate these various lists, bigInitialData
   254  	// is another. Consider moving them to be properties on the workload.Meta.
   255  	fingerprintGoldens := map[string]uint64{
   256  		`bank`:       0x7b4d519ed8bd07ce,
   257  		`bulkingest`: 0xcf3e4028ac084aea,
   258  		`indexes`:    0xcbf29ce484222325,
   259  		`intro`:      0x81c6a8cfd9c3452a,
   260  		`json`:       0xcbf29ce484222325,
   261  		`ledger`:     0xebe27d872d980271,
   262  		`movr`:       0x4c0da49085e0bc5c,
   263  		`queue`:      0xcbf29ce484222325,
   264  		`rand`:       0xcbf29ce484222325,
   265  		`roachmart`:  0xda5e73423dbdb2d9,
   266  		`sqlsmith`:   0xcbf29ce484222325,
   267  		`startrek`:   0xa0249fbdf612734c,
   268  		`tpcc`:       0xab32e4f5e899eb2f,
   269  		`tpch`:       0xdd952207e22aa577,
   270  		`ycsb`:       0x85dd34d8c07fd808,
   271  	}
   272  
   273  	var a bufalloc.ByteAllocator
   274  	for _, meta := range workload.Registered() {
   275  		fingerprintGolden, ok := fingerprintGoldens[meta.Name]
   276  		if !ok {
   277  			// TODO(dan): It'd be nice to eventually require that all registered
   278  			// workloads are deterministic, but given that tpcc was a legitimate
   279  			// exception for a while (for performance reasons), it's not clear right
   280  			// now that we should be strict about this.
   281  			continue
   282  		}
   283  		t.Run(meta.Name, func(t *testing.T) {
   284  			if bigInitialData(meta) && testing.Short() {
   285  				t.Skipf(`%s involves a lot of data`, meta.Name)
   286  			}
   287  
   288  			h := fnv.New64()
   289  			tables := meta.New().Tables()
   290  			for _, table := range tables {
   291  				require.NoError(t, hashTableInitialData(h, table.InitialRows, &a))
   292  			}
   293  			require.Equal(t, fingerprintGolden, h.Sum64())
   294  		})
   295  	}
   296  }