github.com/square/finch@v0.0.0-20240412205204-6530c03e2b96/trx/trx.go (about) 1 // Copyright 2024 Block, Inc. 2 3 package trx 4 5 import ( 6 "bufio" 7 "fmt" 8 "log" 9 "os" 10 "regexp" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/dustin/go-humanize" 16 17 "github.com/square/finch" 18 "github.com/square/finch/config" 19 "github.com/square/finch/data" 20 "github.com/square/finch/limit" 21 ) 22 23 const ( 24 STMT = byte(0x0) 25 BEGIN = byte(0x1) 26 END = byte(0x2) 27 ) 28 29 const EXPLICIT_CALL_SUFFIX = "()" 30 31 var DataKeyPattern = regexp.MustCompile(`@[\w_-]+(?:\(\))?`) 32 var ExplicitCallPattern = regexp.MustCompile(`@[\w_-]+\(\)`) 33 34 // Set is the complete set of transactions (and statements) for a stage. 35 type Set struct { 36 Order []string // trx names in config order 37 Statements map[string][]*Statement // keyed on trx name 38 Meta map[string]Meta // keyed on trx name 39 Data *data.Scope // keyed on data key (@d) 40 } 41 42 // Statement is one query in a transaction and all its read-only metadata. 43 type Statement struct { 44 Trx string 45 Query string 46 ResultSet bool 47 Prepare bool 48 PrepareMulti int 49 Begin bool 50 Commit bool 51 Write bool 52 DDL bool 53 Idle time.Duration 54 Inputs []string // data keys (number of values) 55 Outputs []string // data keys save-results|columns and save-insert-id 56 InsertId string // data key (special output) 57 Limit limit.Data 58 Calls []byte 59 } 60 61 type Meta struct { 62 DDL bool 63 } 64 65 // Load loads all trx files and returns a Set representing all parsed trx. 66 // This is called from stage.Prepare since a stage comprises all trx. 67 // The given data scope comes from compute.Server to handle globally scoped 68 // data keys. Params are user-defined from the stage file: stage.params. 69 // The stage uses the returned Set for workload allocation based on however 70 // stage.workload mixes and matches trx to exec/client groups. 71 func Load(trxFiles []config.Trx, scope *data.Scope, params map[string]string) (*Set, error) { 72 set := &Set{ 73 Order: make([]string, 0, len(trxFiles)), 74 Statements: map[string][]*Statement{}, 75 Data: scope, 76 Meta: map[string]Meta{}, 77 } 78 for i := range trxFiles { 79 if err := NewFile(trxFiles[i], set, params).Load(); err != nil { 80 return nil, err 81 } 82 } 83 return set, nil 84 } 85 86 var ErrEOF = fmt.Errorf("EOF") 87 88 type lineBuf struct { 89 n uint 90 str string 91 mods []string 92 copyNo uint 93 } 94 95 // File represents and loads one trx file. File.Load is called by the pkg func, 96 // trx.Load, which is called by stage.Prepare. Do not call File.Load directly 97 // except for testing. 98 type File struct { 99 cfg config.Trx // stage.trx[] 100 set *Set // trx set for the stage, what File.Load fills in 101 params map[string]string // stage.params: user-defined value interpolation 102 // -- 103 lb lineBuf // save lines until a complete statement is read 104 colRefs map[string]int // column ref counts to detect unused ones 105 stmtNo uint // 1-indexed in file (not a line number; not an index into stmt) 106 stmts []*Statement // all statements in this file 107 hasDDL bool // true if any statement is DDL 108 } 109 110 func NewFile(cfg config.Trx, set *Set, params map[string]string) *File { 111 return &File{ 112 cfg: cfg, 113 set: set, 114 params: params, 115 colRefs: map[string]int{}, 116 lb: lineBuf{mods: []string{}}, 117 stmts: []*Statement{}, 118 stmtNo: 0, 119 } 120 } 121 122 func (f *File) Load() error { 123 finch.Debug("loading %s", f.cfg.File) 124 file, err := os.Open(f.cfg.File) 125 if err != nil { 126 return err 127 } 128 defer file.Close() 129 130 scanner := bufio.NewScanner(file) 131 for scanner.Scan() { 132 err = f.line(strings.TrimSpace(scanner.Text())) 133 if err != nil { 134 if err == ErrEOF { 135 break 136 } 137 return err 138 } 139 } 140 err = f.line("") // last line 141 if err != nil { 142 return err 143 } 144 145 if len(f.stmts) == 0 { 146 return fmt.Errorf("trx file %s has no statements; at least 1 is required", f.cfg.File) 147 } 148 149 noRefs := []string{} 150 for col, refs := range f.colRefs { 151 if refs > 0 { 152 continue 153 } 154 noRefs = append(noRefs, col) 155 } 156 if len(noRefs) > 0 { 157 return fmt.Errorf("saved columns not referenced: %s", strings.Join(noRefs, ", ")) 158 } 159 160 if err := scanner.Err(); err != nil { 161 log.Fatal(err) // shouldn't happen 162 } 163 164 f.set.Order = append(f.set.Order, f.cfg.Name) 165 f.set.Statements[f.cfg.Name] = f.stmts 166 f.set.Meta[f.cfg.Name] = Meta{ 167 DDL: f.hasDDL, 168 } 169 170 return nil 171 } 172 173 func (f *File) line(line string) error { 174 f.lb.n++ 175 176 // More lines in statement 177 if line != "" { 178 finch.Debug("line %d: %s\n", f.lb.n, line) 179 if strings.HasPrefix(line, "-- ") { 180 if line == "-- EOF" { 181 return ErrEOF 182 } 183 mod, err := config.Vars(strings.TrimSpace(strings.TrimPrefix(line, "--")), f.params, true) 184 if err != nil { 185 return fmt.Errorf("parsing modifier '%s' on line %d: %s", line, f.lb.n, err) 186 } 187 f.lb.mods = append(f.lb.mods, mod) 188 } else { 189 f.lb.str += line + " " 190 } 191 return nil 192 } 193 194 // Empty lines between statements 195 if f.lb.str == "" { 196 finch.Debug("line %d: space", f.lb.n) 197 return nil 198 } 199 200 // End of statement 201 finch.Debug("line %d: end prev", f.lb.n) 202 s, err := f.statements() 203 if err != nil { 204 return fmt.Errorf("error parsing %s at line %d: %s", f.cfg.File, f.lb.n-1, err) 205 } 206 for i := range s { 207 finch.Debug("stmt: %+v", s[i]) 208 } 209 f.stmts = append(f.stmts, s...) 210 211 f.lb.str = "" 212 f.lb.mods = []string{} 213 214 return nil 215 } 216 217 var reKeyVal = regexp.MustCompile(`([\w_-]+)(?:\:\s*(\w+))?`) 218 var reCSV = regexp.MustCompile(`\/\*\!csv\s+(\d+)\s+(.+)\*\/`) 219 var reFirstWord = regexp.MustCompile(`^(\w+)`) 220 221 func (f *File) statements() ([]*Statement, error) { 222 f.stmtNo++ 223 s := &Statement{ 224 Trx: f.cfg.Name, // trx name (trx.name or base(trx.file) 225 } 226 227 query := strings.TrimSpace(f.lb.str) 228 finch.Debug("query raw: %s", query) 229 230 // ---------------------------------------------------------------------- 231 // Switches 232 // ---------------------------------------------------------------------- 233 234 // @todo regexp to extract first word 235 com := strings.ToUpper(reFirstWord.FindString(query)) 236 switch com { 237 case "SELECT": 238 s.ResultSet = true 239 case "BEGIN", "START": 240 s.Begin = true // used to rate limit trx per second (TPS) in client/client.go 241 case "COMMIT": 242 s.Commit = true // used to measure TPS rate in client/client.go 243 case "INSERT", "UPDATE", "DELETE", "REPLACE": 244 s.Write = true 245 case "ALTER", "CREATE", "DROP", "RENAME", "TRUNCATE": 246 finch.Debug("DDL") 247 s.DDL = true // statement is DDL 248 f.hasDDL = true // trx has DDL 249 } 250 251 // ---------------------------------------------------------------------- 252 // Modifiers: --prepare, --table-size, etc. 253 // ---------------------------------------------------------------------- 254 255 for _, mod := range f.lb.mods { 256 m := strings.Fields(mod) 257 finch.Debug("mod: '%v' %#v", mod, m) 258 if len(m) < 1 { 259 return nil, fmt.Errorf("invalid modifier: '%s': does not match key: value (pattern match < 2)", mod) 260 } 261 m[0] = strings.Trim(m[0], ":") 262 switch m[0] { 263 case "prepare", "prepared": 264 s.Prepare = true 265 case "idle": 266 d, err := time.ParseDuration(m[1]) 267 if err != nil { 268 return nil, fmt.Errorf("invalid idle modifier: '%s': %s", mod, err) 269 } 270 s.Idle = d 271 case "rows": 272 max, err := strconv.ParseUint(m[1], 10, 64) 273 if err != nil { 274 return nil, fmt.Errorf("invalid rows limit: %s: %s", m[1], err) 275 } 276 var offset uint64 277 if len(m) == 3 { 278 offset, err = strconv.ParseUint(m[2], 10, 64) 279 if err != nil { 280 return nil, fmt.Errorf("invalid rows offset: %s: %s", m[2], err) 281 } 282 } 283 finch.Debug("write limit: %d rows (offset %d)", max, offset) 284 s.Limit = limit.Or(s.Limit, limit.NewRows(int64(max), int64(offset))) 285 case "table-size", "database-size": 286 if len(m) != 3 { 287 return nil, fmt.Errorf("invalid %s modifier: split %d fields, expected 3: %s", m[0], len(m), mod) 288 } 289 max, err := humanize.ParseBytes(m[2]) 290 if err != nil { 291 return nil, err 292 } 293 var lm limit.Data 294 if m[0] == "table-size" { 295 lm = limit.NewSize(max, m[2], "", m[1]) 296 } else { // database-size 297 lm = limit.NewSize(max, m[2], m[1], "") 298 } 299 s.Limit = limit.Or(s.Limit, lm) 300 case "save-insert-id": 301 // @todo check len(m) 302 if s.ResultSet { 303 return nil, fmt.Errorf("save-insert-id not allowed on SELECT") 304 } 305 finch.Debug("save-insert-id") 306 dataKey, err := f.column(0, m[1]) 307 if err != nil { 308 return nil, err 309 } 310 s.InsertId = dataKey 311 s.Outputs = append(s.Outputs, dataKey) 312 case "save-columns": 313 // @todo check len(m) 314 for i, col := range m[1:] { 315 // @todo split csv (handle "col1,col2" instead of "col1, col2") 316 dataKey, err := f.column(i, col) 317 if err != nil { 318 return nil, err 319 } 320 s.Outputs = append(s.Outputs, dataKey) 321 } 322 case "copies": 323 n, err := strconv.Atoi(m[1]) 324 if err != nil { 325 return nil, fmt.Errorf("copies: %s invalid: %s", m[1], err) 326 } 327 if n < 0 { 328 return nil, fmt.Errorf("copies: %s invalid: must be >= 0", m[1]) 329 } 330 if n == 0 { 331 return nil, nil 332 } 333 if n == 1 { 334 continue 335 } 336 prepareMulti := false 337 mods := make([]string, 0, len(f.lb.mods)-1) 338 for _, mod := range f.lb.mods { 339 if strings.HasPrefix(mod, "copies") { 340 continue 341 } 342 if strings.HasPrefix(mod, "prepare") && !strings.Contains(query, finch.COPY_NUMBER) { 343 prepareMulti = true 344 } 345 mods = append(mods, mod) 346 } 347 f.lb.mods = mods 348 f.stmtNo-- 349 multi := make([]*Statement, n) 350 for i := 0; i < n; i++ { 351 finch.Debug("copy %d of %d", i+1, n) 352 f.lb.copyNo = uint(i + 1) 353 ms, err := f.statements() // recurse 354 if err != nil { 355 return nil, fmt.Errorf("during copy recurse: %s", err) 356 } 357 multi[i] = ms[0] 358 } 359 if prepareMulti { 360 multi[0].PrepareMulti = n 361 } 362 f.lb.copyNo = 0 363 return multi, nil 364 default: 365 return nil, fmt.Errorf("unknown modifier: %s: '%s'", m[0], mod) 366 } 367 } 368 369 // ---------------------------------------------------------------------- 370 // Replace /*!copy-number*/ 371 // ---------------------------------------------------------------------- 372 query = strings.ReplaceAll(query, finch.COPY_NUMBER, fmt.Sprintf("%d", f.lb.copyNo)) 373 374 // ---------------------------------------------------------------------- 375 // Expand CSV /*!csv N template*/ 376 // ---------------------------------------------------------------------- 377 csvTemplate := "" 378 m := reCSV.FindStringSubmatch(query) 379 if len(m) > 0 { 380 n, err := strconv.ParseInt(m[1], 10, 32) 381 if err != nil { 382 return nil, fmt.Errorf("invalid number of CSV values in %s: %s", m[0], err) 383 } 384 vals := make([]string, n) 385 csvTemplate = strings.TrimSpace(m[2]) 386 387 keys := map[string]bool{} 388 for _, name := range DataKeyPattern.FindAllString(csvTemplate, -1) { 389 // Trim to look up data key in config because @ is not valid YAML. 390 // The @ will be put back later because all other code expects it. 391 name = cfgKey(name) 392 dataCfg, ok := f.cfg.Data[name] // config.stage.trx[].data 393 if !ok { 394 return nil, fmt.Errorf("%s not configured: trx file uses %s but this data key is not configured in the stage file", name, name) 395 } 396 397 // @d in a CSV template defaults to row scope 398 if dataCfg.Scope == "" { 399 dataCfg.Scope = finch.SCOPE_ROW 400 f.cfg.Data[name] = dataCfg 401 } 402 403 // Save row scoped @d in CSV template, ignore other scopes 404 if dataCfg.Scope == finch.SCOPE_ROW { 405 keys["@"+name] = true 406 } 407 } 408 409 // Change first row scoped @d -> @d() so it generates new values per row 410 csvTemplateScoped := RowScope(keys, csvTemplate) 411 finch.Debug("csv %d %s -> %s", n, csvTemplate, csvTemplateScoped) 412 413 // Expand template, e.g. 3 (@d) -> (@d), (@d), (@d) 414 for i := int64(0); i < n; i++ { 415 vals[i] = csvTemplateScoped 416 } 417 csv := strings.Join(vals, ", ") 418 query = reCSV.ReplaceAllLiteralString(query, csv) 419 } 420 421 // ---------------------------------------------------------------------- 422 // Data keys: @d -> data.Generator 423 // ---------------------------------------------------------------------- 424 dataKeys := DataKeyPattern.FindAllString(query, -1) 425 finch.Debug("data keys: %v", dataKeys) 426 if len(dataKeys) == 0 { 427 s.Query = query 428 return []*Statement{s}, nil // no data key, return early 429 } 430 s.Inputs = dataKeys 431 432 s.Calls = Calls(s.Inputs) 433 query = ExplicitCallPattern.ReplaceAllStringFunc(query, func(s string) string { 434 return strings.TrimSuffix(s, EXPLICIT_CALL_SUFFIX) 435 }) 436 437 dataFormats := map[string]string{} // keyed on data name 438 for i, name := range s.Inputs { 439 // Remove () from @d() 440 name = strings.TrimSuffix(name, EXPLICIT_CALL_SUFFIX) 441 s.Inputs[i] = name 442 443 var g data.Generator 444 var err error 445 446 if k, ok := f.set.Data.Keys[name]; ok && k.Column >= 0 { 447 f.colRefs[name]++ 448 g = k.Generator 449 } else if name == "@PREV" { 450 if i == 0 { 451 return nil, fmt.Errorf("no @PREV data generator") 452 } 453 for p := i - 1; p >= 0; p-- { 454 finch.Debug("%s <- %s", dataKeys[p], dataKeys[i]) 455 if dataKeys[p] == "@PREV" { 456 continue 457 } 458 g = f.set.Data.Keys[dataKeys[p]].Generator 459 break 460 } 461 } else { 462 if k, ok = f.set.Data.Keys[name]; ok { 463 g = k.Generator 464 } else { 465 dataCfg, ok := f.cfg.Data[cfgKey(name)] // config.stage.trx[].data 466 if !ok { 467 return nil, fmt.Errorf("%s not configured: trx file uses %s but this data key is not configured in the stage file", name, name) 468 } 469 finch.Debug("make data generator: %s %s scope: %s", dataCfg.Generator, name, dataCfg.Scope) 470 471 if dataCfg.Scope == "" { 472 dataCfg.Scope = finch.SCOPE_STATEMENT 473 f.cfg.Data[name] = dataCfg 474 } 475 476 g, err = data.Make( 477 dataCfg.Generator, // e.g. "auto-inc" 478 name, // @d 479 dataCfg.Params, // trx[].data.params, generator-specific 480 ) 481 if err != nil { 482 return nil, err 483 } 484 f.set.Data.Keys[name] = data.Key{ 485 Name: name, 486 Trx: f.cfg.Name, 487 Line: f.lb.n - 1, 488 Statement: f.stmtNo, 489 Column: -1, 490 Scope: dataCfg.Scope, 491 Generator: g, 492 } 493 finch.Debug("%#v", k) 494 } 495 } 496 497 if s.Prepare { 498 dataFormats[name] = "?" 499 } else { 500 _, dataFormats[name] = g.Format() 501 } 502 } 503 504 replacements := make([]string, len(dataFormats)*2) // *2 because key + value 505 i := 0 506 for k, v := range dataFormats { 507 replacements[i] = k 508 replacements[i+1] = v 509 i += 2 510 } 511 finch.Debug("replacements: %v", replacements) 512 r := strings.NewReplacer(replacements...) 513 s.Query = r.Replace(query) 514 515 // Caller debug prints full Statement 516 return []*Statement{s}, nil 517 } 518 519 func (f *File) column(colNo int, col string) (string, error) { 520 col = strings.TrimSpace(strings.TrimSuffix(col, ",")) 521 finch.Debug("col %s %d", col, colNo) 522 523 // If no-op column "_"? 524 if col == finch.NOOP_COLUMN { 525 if _, ok := f.set.Data.Keys[finch.NOOP_COLUMN]; !ok { 526 f.set.Data.Keys[finch.NOOP_COLUMN] = data.Key{ 527 Name: finch.NOOP_COLUMN, 528 Trx: f.cfg.Name, 529 Line: f.lb.n - 1, 530 Statement: f.stmtNo, 531 Column: colNo, 532 Scope: finch.SCOPE_GLOBAL, 533 Generator: data.Noop, 534 } 535 finch.Debug("%#v", f.set.Data.Keys[finch.NOOP_COLUMN]) 536 } 537 finch.Debug("saved no-op col %s @ %d", col, colNo) 538 return finch.NOOP_COLUMN, nil 539 } 540 541 if k, ok := f.set.Data.Keys[col]; ok { 542 return "", fmt.Errorf("duplicated saved column: %s (first use: %s)", col, k) 543 } 544 545 dataCfg, ok := f.cfg.Data[cfgKey(col)] // config.stage.trx.*.data 546 if !ok { 547 dataCfg = config.Data{ 548 Name: col, 549 Generator: "column", 550 Scope: finch.SCOPE_TRX, 551 } 552 fmt.Printf("No data params for column %s (%s line %d), default to non-quoted value\n", col, f.cfg.Name, f.lb.n-1) 553 } 554 555 g, err := data.Make("column", col, dataCfg.Params) 556 if err != nil { 557 return "", err 558 } 559 f.colRefs[col] = 0 560 f.set.Data.Keys[col] = data.Key{ 561 Name: col, 562 Trx: f.cfg.Name, 563 Line: f.lb.n - 1, 564 Statement: f.stmtNo, 565 Column: colNo, 566 Scope: dataCfg.Scope, 567 Generator: g, 568 } 569 finch.Debug("%#v", f.set.Data.Keys[col]) 570 return col, nil 571 } 572 573 func Calls(dataKeys []string) []byte { 574 calls := make([]byte, len(dataKeys)) 575 for i, name := range dataKeys { 576 if strings.HasSuffix(name, EXPLICIT_CALL_SUFFIX) { 577 calls[i] = 1 578 } 579 } 580 finch.Debug("calls: %v", calls) 581 return calls 582 } 583 584 // RowScope changes every first occurrence of the keys from @d to @d() 585 // in csvTemplate. So "(@d, @d)" -> "(@d(), @d)". The explicit call @d() 586 // makes @d row scoped because each row will call @d again. This is called 587 // when the /*!csv N template */ is being processed (see reCSV). 588 func RowScope(keys map[string]bool, csvTemplate string) string { 589 csvDataKeys := DataKeyPattern.FindAllString(csvTemplate, -1) 590 KEY: 591 for dataKey := range keys { // row scoped keys 592 for _, k := range csvDataKeys { // all keys in csvTemplate 593 if !strings.HasPrefix(k, dataKey) { 594 continue // not the row scoped key we're looking for 595 } 596 // This is first occurrence of row scoped key in csvTemplate. 597 // Add () suffix if not already set. 598 if !strings.HasSuffix(k, EXPLICIT_CALL_SUFFIX) { 599 csvTemplate = strings.Replace(csvTemplate, k, k+EXPLICIT_CALL_SUFFIX, 1) // 1=only first occurrence 600 } 601 continue KEY // only check/change first occurrence, so this row scoped key is done 602 } 603 } 604 return csvTemplate 605 } 606 607 func cfgKey(s string) string { 608 return strings.Trim(s, "@"+EXPLICIT_CALL_SUFFIX) 609 }