github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/internal/sqlsmith/sqlsmith.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sqlsmith 12 13 import ( 14 gosql "database/sql" 15 "fmt" 16 "math/rand" 17 "net/http/httptest" 18 "regexp" 19 "strings" 20 21 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 22 "github.com/cockroachdb/cockroach/pkg/sql/types" 23 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 24 ) 25 26 // sqlsmith-go 27 // 28 // sqlsmith-go is a random SQL query generator, based off of sqlsmith: 29 // 30 // https://github.com/anse1/sqlsmith 31 // 32 // You can think of it as walking a randomly generated AST and materializing 33 // that AST as it goes, which it then feeds into Cockroach with the hopes of 34 // finding panics. 35 // 36 // However, naively generating such an AST will only find certain kinds of 37 // panics: they're almost guaranteed not to pass semantic analysis, and so 38 // any components of the system beyond that will probably not be tested. 39 // To get around this, sqlsmith tracks scopes and types, very similar to 40 // how the optbuilder works, to create ASTs which will likely pass 41 // semantic analysis. 42 // 43 // It does this by building the tree top-down. Every level of the tree 44 // requests input of a certain form. For instance, a SELECT will request 45 // a list of projections which respect the scope that the SELECT introduces, 46 // and a function call will request an input value of a particular type, 47 // subject to the same scope it has. This raises a question: what if we 48 // are unable to construct an expression meeting the restrictions requested 49 // by the parent expression? Rather than do some fancy constraint solving 50 // (which could be an interesting direction for this tool to go in the 51 // future, but I've found to be difficult when I've tried in the past) 52 // sqlsmith will simply try randomly to generate an expression, and once 53 // it fails a certain number of times, it will retreat up the tree and 54 // retry at a higher level. 55 56 const retryCount = 20 57 58 // Smither is a sqlsmith generator. 59 type Smither struct { 60 rnd *rand.Rand 61 db *gosql.DB 62 lock syncutil.RWMutex 63 tables []*tableRef 64 columns map[tree.TableName]map[tree.Name]*tree.ColumnTableDef 65 indexes map[tree.TableName]map[tree.Name]*tree.CreateIndex 66 nameCounts map[string]int 67 activeSavepoints []string 68 69 stmtWeights, alterWeights []statementWeight 70 stmtSampler, alterSampler *statementSampler 71 tableExprWeights []tableExprWeight 72 tableExprSampler *tableExprSampler 73 selectStmtWeights []selectStatementWeight 74 selectStmtSampler *selectStatementSampler 75 scalarExprWeights, boolExprWeights []scalarExprWeight 76 scalarExprSampler, boolExprSampler *scalarExprSampler 77 78 disableWith bool 79 disableImpureFns bool 80 disableLimits bool 81 disableWindowFuncs bool 82 simpleDatums bool 83 avoidConsts bool 84 vectorizable bool 85 outputSort bool 86 postgres bool 87 ignoreFNs []*regexp.Regexp 88 complexity float64 89 90 bulkSrv *httptest.Server 91 bulkFiles map[string][]byte 92 bulkBackups map[string]tree.TargetList 93 bulkExports []string 94 } 95 96 type ( 97 statement func(*Smither) (tree.Statement, bool) 98 tableExpr func(s *Smither, refs colRefs, forJoin bool) (tree.TableExpr, colRefs, bool) 99 selectStatement func(s *Smither, desiredTypes []*types.T, refs colRefs, withTables tableRefs) (tree.SelectStatement, colRefs, bool) 100 scalarExpr func(*Smither, Context, *types.T, colRefs) (expr tree.TypedExpr, ok bool) 101 ) 102 103 // NewSmither creates a new Smither. db is used to populate existing tables 104 // for use as column references. It can be nil to skip table population. 105 func NewSmither(db *gosql.DB, rnd *rand.Rand, opts ...SmitherOption) (*Smither, error) { 106 s := &Smither{ 107 rnd: rnd, 108 db: db, 109 nameCounts: map[string]int{}, 110 111 stmtWeights: allStatements, 112 alterWeights: alters, 113 tableExprWeights: allTableExprs, 114 selectStmtWeights: selectStmts, 115 scalarExprWeights: scalars, 116 boolExprWeights: bools, 117 118 complexity: 0.2, 119 } 120 for _, opt := range opts { 121 opt.Apply(s) 122 } 123 s.stmtSampler = newWeightedStatementSampler(s.stmtWeights, rnd.Int63()) 124 s.alterSampler = newWeightedStatementSampler(s.alterWeights, rnd.Int63()) 125 s.tableExprSampler = newWeightedTableExprSampler(s.tableExprWeights, rnd.Int63()) 126 s.selectStmtSampler = newWeightedSelectStatementSampler(s.selectStmtWeights, rnd.Int63()) 127 s.scalarExprSampler = newWeightedScalarExprSampler(s.scalarExprWeights, rnd.Int63()) 128 s.boolExprSampler = newWeightedScalarExprSampler(s.boolExprWeights, rnd.Int63()) 129 s.enableBulkIO() 130 return s, s.ReloadSchemas() 131 } 132 133 // Close closes resources used by the Smither. 134 func (s *Smither) Close() { 135 if s.bulkSrv != nil { 136 s.bulkSrv.Close() 137 } 138 } 139 140 var prettyCfg = func() tree.PrettyCfg { 141 cfg := tree.DefaultPrettyCfg() 142 cfg.LineWidth = 120 143 cfg.Simplify = false 144 return cfg 145 }() 146 147 // Generate returns a random SQL string. 148 func (s *Smither) Generate() string { 149 i := 0 150 for { 151 stmt, ok := s.makeStmt() 152 if !ok { 153 i++ 154 if i > 1000 { 155 panic("exhausted generation attempts") 156 } 157 continue 158 } 159 i = 0 160 return prettyCfg.Pretty(stmt) 161 } 162 } 163 164 // GenerateExpr returns a random SQL expression that does not depend on any 165 // tables or columns. 166 func (s *Smither) GenerateExpr() tree.TypedExpr { 167 return makeScalar(s, s.randScalarType(), nil) 168 } 169 170 func (s *Smither) name(prefix string) tree.Name { 171 s.lock.Lock() 172 s.nameCounts[prefix]++ 173 count := s.nameCounts[prefix] 174 s.lock.Unlock() 175 return tree.Name(fmt.Sprintf("%s_%d", prefix, count)) 176 } 177 178 // SmitherOption is an option for the Smither client. 179 type SmitherOption interface { 180 Apply(*Smither) 181 String() string 182 } 183 184 func simpleOption(name string, apply func(s *Smither)) func() SmitherOption { 185 return func() SmitherOption { 186 return option{ 187 name: name, 188 apply: apply, 189 } 190 } 191 } 192 193 func multiOption(name string, opts ...SmitherOption) func() SmitherOption { 194 var sb strings.Builder 195 sb.WriteString(name) 196 sb.WriteString("(") 197 delim := "" 198 for _, opt := range opts { 199 sb.WriteString(delim) 200 delim = ", " 201 sb.WriteString(opt.String()) 202 } 203 sb.WriteString(")") 204 return func() SmitherOption { 205 return option{ 206 name: sb.String(), 207 apply: func(s *Smither) { 208 for _, opt := range opts { 209 opt.Apply(s) 210 } 211 }, 212 } 213 } 214 } 215 216 type option struct { 217 name string 218 apply func(s *Smither) 219 } 220 221 func (o option) String() string { 222 return o.name 223 } 224 225 func (o option) Apply(s *Smither) { 226 o.apply(s) 227 } 228 229 // DisableMutations causes the Smither to not emit statements that could 230 // mutate any on-disk data. 231 var DisableMutations = simpleOption("disable mutations", func(s *Smither) { 232 s.stmtWeights = nonMutatingStatements 233 s.tableExprWeights = nonMutatingTableExprs 234 }) 235 236 // DisableDDLs causes the Smither to not emit statements that change table 237 // schema (CREATE, DROP, ALTER, etc.) 238 var DisableDDLs = simpleOption("disable DDLs", func(s *Smither) { 239 s.stmtWeights = []statementWeight{ 240 {20, makeSelect}, 241 {5, makeInsert}, 242 {5, makeUpdate}, 243 {1, makeDelete}, 244 // If we don't have any DDL's, allow for use of savepoints and transactions. 245 {2, makeBegin}, 246 {2, makeSavepoint}, 247 {2, makeReleaseSavepoint}, 248 {2, makeRollbackToSavepoint}, 249 {2, makeCommit}, 250 {2, makeRollback}, 251 } 252 }) 253 254 // OnlyNoDropDDLs causes the Smither to only emit DDLs, but won't ever drop 255 // a table. 256 var OnlyNoDropDDLs = simpleOption("only DDLs", func(s *Smither) { 257 s.stmtWeights = append([]statementWeight{ 258 {1, makeBegin}, 259 {2, makeRollback}, 260 {6, makeCommit}, 261 }, 262 altersExistingTable..., 263 ) 264 }) 265 266 // DisableWith causes the Smither to not emit WITH clauses. 267 var DisableWith = simpleOption("disable WITH", func(s *Smither) { 268 s.disableWith = true 269 }) 270 271 // DisableImpureFns causes the Smither to disable impure functions. 272 var DisableImpureFns = simpleOption("disable impure funcs", func(s *Smither) { 273 s.disableImpureFns = true 274 }) 275 276 // DisableCRDBFns causes the Smither to disable crdb_internal functions. 277 func DisableCRDBFns() SmitherOption { 278 return IgnoreFNs("^crdb_internal") 279 } 280 281 // SimpleDatums causes the Smither to emit simpler constant datums. 282 var SimpleDatums = simpleOption("simple datums", func(s *Smither) { 283 s.simpleDatums = true 284 }) 285 286 // IgnoreFNs causes the Smither to ignore functions that match the regex. 287 func IgnoreFNs(regex string) SmitherOption { 288 r := regexp.MustCompile(regex) 289 return option{ 290 name: fmt.Sprintf("ignore fns: %q", r.String()), 291 apply: func(s *Smither) { 292 s.ignoreFNs = append(s.ignoreFNs, r) 293 }, 294 } 295 } 296 297 // DisableLimits causes the Smither to disable LIMIT clauses. 298 var DisableLimits = simpleOption("disable LIMIT", func(s *Smither) { 299 s.disableLimits = true 300 }) 301 302 // AvoidConsts causes the Smither to prefer column references over generating 303 // constants. 304 var AvoidConsts = simpleOption("avoid consts", func(s *Smither) { 305 s.avoidConsts = true 306 }) 307 308 // DisableWindowFuncs disables window functions. 309 var DisableWindowFuncs = simpleOption("disable window funcs", func(s *Smither) { 310 s.disableWindowFuncs = true 311 }) 312 313 // Vectorizable causes the Smither to limit query generation to queries 314 // supported by vectorized execution. 315 var Vectorizable = multiOption( 316 "Vectorizable", 317 DisableMutations(), 318 DisableWith(), 319 DisableWindowFuncs(), 320 AvoidConsts(), 321 // This must be last so it can make the final changes to table 322 // exprs and statements. 323 simpleOption("vectorizable", func(s *Smither) { 324 s.vectorizable = true 325 s.stmtWeights = nonMutatingStatements 326 s.tableExprWeights = vectorizableTableExprs 327 })(), 328 ) 329 330 // OutputSort adds a top-level ORDER BY on all columns. 331 var OutputSort = simpleOption("output sort", func(s *Smither) { 332 s.outputSort = true 333 }) 334 335 // CompareMode causes the Smither to generate statements that have 336 // deterministic output. 337 var CompareMode = multiOption( 338 "compare mode", 339 DisableMutations(), 340 DisableImpureFns(), 341 DisableCRDBFns(), 342 IgnoreFNs("^version"), 343 DisableLimits(), 344 OutputSort(), 345 ) 346 347 // PostgresMode causes the Smither to generate statements that work identically 348 // in Postgres and Cockroach. 349 var PostgresMode = multiOption( 350 "postgres mode", 351 CompareMode(), 352 DisableWith(), 353 SimpleDatums(), 354 IgnoreFNs("^current_"), 355 simpleOption("postgres", func(s *Smither) { 356 s.postgres = true 357 })(), 358 359 // Some func impls differ from postgres, so skip them here. 360 // #41709 361 IgnoreFNs("^sha"), 362 // We use e'XX' instead of E'XX' for hex strings, so ignore these. 363 IgnoreFNs("^quote"), 364 // We have some differences here with empty string and "default"; skip until fixed. 365 IgnoreFNs("^pg_collation_for"), 366 )