github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/internal/sqlsmith/sqlsmith.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sqlsmith
    12  
    13  import (
    14  	gosql "database/sql"
    15  	"fmt"
    16  	"math/rand"
    17  	"net/http/httptest"
    18  	"regexp"
    19  	"strings"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    24  )
    25  
    26  // sqlsmith-go
    27  //
    28  // sqlsmith-go is a random SQL query generator, based off of sqlsmith:
    29  //
    30  //   https://github.com/anse1/sqlsmith
    31  //
    32  // You can think of it as walking a randomly generated AST and materializing
    33  // that AST as it goes, which it then feeds into Cockroach with the hopes of
    34  // finding panics.
    35  //
    36  // However, naively generating such an AST will only find certain kinds of
    37  // panics: they're almost guaranteed not to pass semantic analysis, and so
    38  // any components of the system beyond that will probably not be tested.
    39  // To get around this, sqlsmith tracks scopes and types, very similar to
    40  // how the optbuilder works, to create ASTs which will likely pass
    41  // semantic analysis.
    42  //
    43  // It does this by building the tree top-down. Every level of the tree
    44  // requests input of a certain form. For instance, a SELECT will request
    45  // a list of projections which respect the scope that the SELECT introduces,
    46  // and a function call will request an input value of a particular type,
    47  // subject to the same scope it has. This raises a question: what if we
    48  // are unable to construct an expression meeting the restrictions requested
    49  // by the parent expression? Rather than do some fancy constraint solving
    50  // (which could be an interesting direction for this tool to go in the
    51  // future, but I've found to be difficult when I've tried in the past)
    52  // sqlsmith will simply try randomly to generate an expression, and once
    53  // it fails a certain number of times, it will retreat up the tree and
    54  // retry at a higher level.
    55  
    56  const retryCount = 20
    57  
    58  // Smither is a sqlsmith generator.
    59  type Smither struct {
    60  	rnd              *rand.Rand
    61  	db               *gosql.DB
    62  	lock             syncutil.RWMutex
    63  	tables           []*tableRef
    64  	columns          map[tree.TableName]map[tree.Name]*tree.ColumnTableDef
    65  	indexes          map[tree.TableName]map[tree.Name]*tree.CreateIndex
    66  	nameCounts       map[string]int
    67  	activeSavepoints []string
    68  
    69  	stmtWeights, alterWeights          []statementWeight
    70  	stmtSampler, alterSampler          *statementSampler
    71  	tableExprWeights                   []tableExprWeight
    72  	tableExprSampler                   *tableExprSampler
    73  	selectStmtWeights                  []selectStatementWeight
    74  	selectStmtSampler                  *selectStatementSampler
    75  	scalarExprWeights, boolExprWeights []scalarExprWeight
    76  	scalarExprSampler, boolExprSampler *scalarExprSampler
    77  
    78  	disableWith        bool
    79  	disableImpureFns   bool
    80  	disableLimits      bool
    81  	disableWindowFuncs bool
    82  	simpleDatums       bool
    83  	avoidConsts        bool
    84  	vectorizable       bool
    85  	outputSort         bool
    86  	postgres           bool
    87  	ignoreFNs          []*regexp.Regexp
    88  	complexity         float64
    89  
    90  	bulkSrv     *httptest.Server
    91  	bulkFiles   map[string][]byte
    92  	bulkBackups map[string]tree.TargetList
    93  	bulkExports []string
    94  }
    95  
    96  type (
    97  	statement       func(*Smither) (tree.Statement, bool)
    98  	tableExpr       func(s *Smither, refs colRefs, forJoin bool) (tree.TableExpr, colRefs, bool)
    99  	selectStatement func(s *Smither, desiredTypes []*types.T, refs colRefs, withTables tableRefs) (tree.SelectStatement, colRefs, bool)
   100  	scalarExpr      func(*Smither, Context, *types.T, colRefs) (expr tree.TypedExpr, ok bool)
   101  )
   102  
   103  // NewSmither creates a new Smither. db is used to populate existing tables
   104  // for use as column references. It can be nil to skip table population.
   105  func NewSmither(db *gosql.DB, rnd *rand.Rand, opts ...SmitherOption) (*Smither, error) {
   106  	s := &Smither{
   107  		rnd:        rnd,
   108  		db:         db,
   109  		nameCounts: map[string]int{},
   110  
   111  		stmtWeights:       allStatements,
   112  		alterWeights:      alters,
   113  		tableExprWeights:  allTableExprs,
   114  		selectStmtWeights: selectStmts,
   115  		scalarExprWeights: scalars,
   116  		boolExprWeights:   bools,
   117  
   118  		complexity: 0.2,
   119  	}
   120  	for _, opt := range opts {
   121  		opt.Apply(s)
   122  	}
   123  	s.stmtSampler = newWeightedStatementSampler(s.stmtWeights, rnd.Int63())
   124  	s.alterSampler = newWeightedStatementSampler(s.alterWeights, rnd.Int63())
   125  	s.tableExprSampler = newWeightedTableExprSampler(s.tableExprWeights, rnd.Int63())
   126  	s.selectStmtSampler = newWeightedSelectStatementSampler(s.selectStmtWeights, rnd.Int63())
   127  	s.scalarExprSampler = newWeightedScalarExprSampler(s.scalarExprWeights, rnd.Int63())
   128  	s.boolExprSampler = newWeightedScalarExprSampler(s.boolExprWeights, rnd.Int63())
   129  	s.enableBulkIO()
   130  	return s, s.ReloadSchemas()
   131  }
   132  
   133  // Close closes resources used by the Smither.
   134  func (s *Smither) Close() {
   135  	if s.bulkSrv != nil {
   136  		s.bulkSrv.Close()
   137  	}
   138  }
   139  
   140  var prettyCfg = func() tree.PrettyCfg {
   141  	cfg := tree.DefaultPrettyCfg()
   142  	cfg.LineWidth = 120
   143  	cfg.Simplify = false
   144  	return cfg
   145  }()
   146  
   147  // Generate returns a random SQL string.
   148  func (s *Smither) Generate() string {
   149  	i := 0
   150  	for {
   151  		stmt, ok := s.makeStmt()
   152  		if !ok {
   153  			i++
   154  			if i > 1000 {
   155  				panic("exhausted generation attempts")
   156  			}
   157  			continue
   158  		}
   159  		i = 0
   160  		return prettyCfg.Pretty(stmt)
   161  	}
   162  }
   163  
   164  // GenerateExpr returns a random SQL expression that does not depend on any
   165  // tables or columns.
   166  func (s *Smither) GenerateExpr() tree.TypedExpr {
   167  	return makeScalar(s, s.randScalarType(), nil)
   168  }
   169  
   170  func (s *Smither) name(prefix string) tree.Name {
   171  	s.lock.Lock()
   172  	s.nameCounts[prefix]++
   173  	count := s.nameCounts[prefix]
   174  	s.lock.Unlock()
   175  	return tree.Name(fmt.Sprintf("%s_%d", prefix, count))
   176  }
   177  
   178  // SmitherOption is an option for the Smither client.
   179  type SmitherOption interface {
   180  	Apply(*Smither)
   181  	String() string
   182  }
   183  
   184  func simpleOption(name string, apply func(s *Smither)) func() SmitherOption {
   185  	return func() SmitherOption {
   186  		return option{
   187  			name:  name,
   188  			apply: apply,
   189  		}
   190  	}
   191  }
   192  
   193  func multiOption(name string, opts ...SmitherOption) func() SmitherOption {
   194  	var sb strings.Builder
   195  	sb.WriteString(name)
   196  	sb.WriteString("(")
   197  	delim := ""
   198  	for _, opt := range opts {
   199  		sb.WriteString(delim)
   200  		delim = ", "
   201  		sb.WriteString(opt.String())
   202  	}
   203  	sb.WriteString(")")
   204  	return func() SmitherOption {
   205  		return option{
   206  			name: sb.String(),
   207  			apply: func(s *Smither) {
   208  				for _, opt := range opts {
   209  					opt.Apply(s)
   210  				}
   211  			},
   212  		}
   213  	}
   214  }
   215  
   216  type option struct {
   217  	name  string
   218  	apply func(s *Smither)
   219  }
   220  
   221  func (o option) String() string {
   222  	return o.name
   223  }
   224  
   225  func (o option) Apply(s *Smither) {
   226  	o.apply(s)
   227  }
   228  
   229  // DisableMutations causes the Smither to not emit statements that could
   230  // mutate any on-disk data.
   231  var DisableMutations = simpleOption("disable mutations", func(s *Smither) {
   232  	s.stmtWeights = nonMutatingStatements
   233  	s.tableExprWeights = nonMutatingTableExprs
   234  })
   235  
   236  // DisableDDLs causes the Smither to not emit statements that change table
   237  // schema (CREATE, DROP, ALTER, etc.)
   238  var DisableDDLs = simpleOption("disable DDLs", func(s *Smither) {
   239  	s.stmtWeights = []statementWeight{
   240  		{20, makeSelect},
   241  		{5, makeInsert},
   242  		{5, makeUpdate},
   243  		{1, makeDelete},
   244  		// If we don't have any DDL's, allow for use of savepoints and transactions.
   245  		{2, makeBegin},
   246  		{2, makeSavepoint},
   247  		{2, makeReleaseSavepoint},
   248  		{2, makeRollbackToSavepoint},
   249  		{2, makeCommit},
   250  		{2, makeRollback},
   251  	}
   252  })
   253  
   254  // OnlyNoDropDDLs causes the Smither to only emit DDLs, but won't ever drop
   255  // a table.
   256  var OnlyNoDropDDLs = simpleOption("only DDLs", func(s *Smither) {
   257  	s.stmtWeights = append([]statementWeight{
   258  		{1, makeBegin},
   259  		{2, makeRollback},
   260  		{6, makeCommit},
   261  	},
   262  		altersExistingTable...,
   263  	)
   264  })
   265  
   266  // DisableWith causes the Smither to not emit WITH clauses.
   267  var DisableWith = simpleOption("disable WITH", func(s *Smither) {
   268  	s.disableWith = true
   269  })
   270  
   271  // DisableImpureFns causes the Smither to disable impure functions.
   272  var DisableImpureFns = simpleOption("disable impure funcs", func(s *Smither) {
   273  	s.disableImpureFns = true
   274  })
   275  
   276  // DisableCRDBFns causes the Smither to disable crdb_internal functions.
   277  func DisableCRDBFns() SmitherOption {
   278  	return IgnoreFNs("^crdb_internal")
   279  }
   280  
   281  // SimpleDatums causes the Smither to emit simpler constant datums.
   282  var SimpleDatums = simpleOption("simple datums", func(s *Smither) {
   283  	s.simpleDatums = true
   284  })
   285  
   286  // IgnoreFNs causes the Smither to ignore functions that match the regex.
   287  func IgnoreFNs(regex string) SmitherOption {
   288  	r := regexp.MustCompile(regex)
   289  	return option{
   290  		name: fmt.Sprintf("ignore fns: %q", r.String()),
   291  		apply: func(s *Smither) {
   292  			s.ignoreFNs = append(s.ignoreFNs, r)
   293  		},
   294  	}
   295  }
   296  
   297  // DisableLimits causes the Smither to disable LIMIT clauses.
   298  var DisableLimits = simpleOption("disable LIMIT", func(s *Smither) {
   299  	s.disableLimits = true
   300  })
   301  
   302  // AvoidConsts causes the Smither to prefer column references over generating
   303  // constants.
   304  var AvoidConsts = simpleOption("avoid consts", func(s *Smither) {
   305  	s.avoidConsts = true
   306  })
   307  
   308  // DisableWindowFuncs disables window functions.
   309  var DisableWindowFuncs = simpleOption("disable window funcs", func(s *Smither) {
   310  	s.disableWindowFuncs = true
   311  })
   312  
   313  // Vectorizable causes the Smither to limit query generation to queries
   314  // supported by vectorized execution.
   315  var Vectorizable = multiOption(
   316  	"Vectorizable",
   317  	DisableMutations(),
   318  	DisableWith(),
   319  	DisableWindowFuncs(),
   320  	AvoidConsts(),
   321  	// This must be last so it can make the final changes to table
   322  	// exprs and statements.
   323  	simpleOption("vectorizable", func(s *Smither) {
   324  		s.vectorizable = true
   325  		s.stmtWeights = nonMutatingStatements
   326  		s.tableExprWeights = vectorizableTableExprs
   327  	})(),
   328  )
   329  
   330  // OutputSort adds a top-level ORDER BY on all columns.
   331  var OutputSort = simpleOption("output sort", func(s *Smither) {
   332  	s.outputSort = true
   333  })
   334  
   335  // CompareMode causes the Smither to generate statements that have
   336  // deterministic output.
   337  var CompareMode = multiOption(
   338  	"compare mode",
   339  	DisableMutations(),
   340  	DisableImpureFns(),
   341  	DisableCRDBFns(),
   342  	IgnoreFNs("^version"),
   343  	DisableLimits(),
   344  	OutputSort(),
   345  )
   346  
   347  // PostgresMode causes the Smither to generate statements that work identically
   348  // in Postgres and Cockroach.
   349  var PostgresMode = multiOption(
   350  	"postgres mode",
   351  	CompareMode(),
   352  	DisableWith(),
   353  	SimpleDatums(),
   354  	IgnoreFNs("^current_"),
   355  	simpleOption("postgres", func(s *Smither) {
   356  		s.postgres = true
   357  	})(),
   358  
   359  	// Some func impls differ from postgres, so skip them here.
   360  	// #41709
   361  	IgnoreFNs("^sha"),
   362  	// We use e'XX' instead of E'XX' for hex strings, so ignore these.
   363  	IgnoreFNs("^quote"),
   364  	// We have some differences here with empty string and "default"; skip until fixed.
   365  	IgnoreFNs("^pg_collation_for"),
   366  )