github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/metamorphic/parser.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package metamorphic 6 7 import ( 8 "fmt" 9 "go/scanner" 10 "go/token" 11 "reflect" 12 "strconv" 13 "strings" 14 15 "github.com/cockroachdb/errors" 16 "github.com/cockroachdb/pebble" 17 ) 18 19 type methodInfo struct { 20 constructor func() op 21 validTags uint32 22 } 23 24 func makeMethod(i interface{}, tags ...objTag) *methodInfo { 25 var validTags uint32 26 for _, tag := range tags { 27 validTags |= 1 << tag 28 } 29 30 t := reflect.TypeOf(i) 31 return &methodInfo{ 32 constructor: func() op { 33 return reflect.New(t).Interface().(op) 34 }, 35 validTags: validTags, 36 } 37 } 38 39 // args returns the receiverID, targetID and arguments for the op. The 40 // receiverID is the ID of the object the op will be applied to. The targetID 41 // is the ID of the object for assignment. If the method does not return a new 42 // object, then targetID will be nil. The argument list is just what it sounds 43 // like: the list of arguments for the operation. 44 func opArgs(op op) (receiverID *objID, targetID *objID, args []interface{}) { 45 switch t := op.(type) { 46 case *applyOp: 47 return &t.writerID, nil, []interface{}{&t.batchID} 48 case *checkpointOp: 49 return &t.dbID, nil, []interface{}{&t.spans} 50 case *closeOp: 51 return &t.objID, nil, nil 52 case *compactOp: 53 return &t.dbID, nil, []interface{}{&t.start, &t.end, &t.parallelize} 54 case *batchCommitOp: 55 return &t.batchID, nil, nil 56 case *dbRatchetFormatMajorVersionOp: 57 return &t.dbID, nil, []interface{}{&t.vers} 58 case *dbRestartOp: 59 return &t.dbID, nil, nil 60 case *deleteOp: 61 return &t.writerID, nil, []interface{}{&t.key} 62 case *deleteRangeOp: 63 return &t.writerID, nil, []interface{}{&t.start, &t.end} 64 case *iterFirstOp: 65 return &t.iterID, nil, nil 66 case *flushOp: 67 return &t.db, nil, nil 68 case *getOp: 69 return &t.readerID, nil, []interface{}{&t.key} 70 case *ingestOp: 71 return &t.dbID, nil, []interface{}{&t.batchIDs} 72 case *ingestAndExciseOp: 73 return &t.dbID, nil, []interface{}{&t.batchID, &t.exciseStart, &t.exciseEnd} 74 case *initOp: 75 return nil, nil, []interface{}{&t.dbSlots, &t.batchSlots, &t.iterSlots, &t.snapshotSlots} 76 case *iterLastOp: 77 return &t.iterID, nil, nil 78 case *mergeOp: 79 return &t.writerID, nil, []interface{}{&t.key, &t.value} 80 case *newBatchOp: 81 return &t.dbID, &t.batchID, nil 82 case *newIndexedBatchOp: 83 return &t.dbID, &t.batchID, nil 84 case *newIterOp: 85 return &t.readerID, &t.iterID, []interface{}{&t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix} 86 case *newIterUsingCloneOp: 87 return &t.existingIterID, &t.iterID, []interface{}{&t.refreshBatch, &t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix} 88 case *newSnapshotOp: 89 return &t.dbID, &t.snapID, []interface{}{&t.bounds} 90 case *iterNextOp: 91 return &t.iterID, nil, []interface{}{&t.limit} 92 case *iterNextPrefixOp: 93 return &t.iterID, nil, nil 94 case *iterCanSingleDelOp: 95 return &t.iterID, nil, []interface{}{} 96 case *iterPrevOp: 97 return &t.iterID, nil, []interface{}{&t.limit} 98 case *iterSeekLTOp: 99 return &t.iterID, nil, []interface{}{&t.key, &t.limit} 100 case *iterSeekGEOp: 101 return &t.iterID, nil, []interface{}{&t.key, &t.limit} 102 case *iterSeekPrefixGEOp: 103 return &t.iterID, nil, []interface{}{&t.key} 104 case *setOp: 105 return &t.writerID, nil, []interface{}{&t.key, &t.value} 106 case *iterSetBoundsOp: 107 return &t.iterID, nil, []interface{}{&t.lower, &t.upper} 108 case *iterSetOptionsOp: 109 return &t.iterID, nil, []interface{}{&t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix} 110 case *singleDeleteOp: 111 return &t.writerID, nil, []interface{}{&t.key, &t.maybeReplaceDelete} 112 case *rangeKeyDeleteOp: 113 return &t.writerID, nil, []interface{}{&t.start, &t.end} 114 case *rangeKeySetOp: 115 return &t.writerID, nil, []interface{}{&t.start, &t.end, &t.suffix, &t.value} 116 case *rangeKeyUnsetOp: 117 return &t.writerID, nil, []interface{}{&t.start, &t.end, &t.suffix} 118 case *replicateOp: 119 return &t.source, nil, []interface{}{&t.dest, &t.start, &t.end} 120 } 121 panic(fmt.Sprintf("unsupported op type: %T", op)) 122 } 123 124 var methods = map[string]*methodInfo{ 125 "Apply": makeMethod(applyOp{}, dbTag, batchTag), 126 "Checkpoint": makeMethod(checkpointOp{}, dbTag), 127 "Clone": makeMethod(newIterUsingCloneOp{}, iterTag), 128 "Close": makeMethod(closeOp{}, dbTag, batchTag, iterTag, snapTag), 129 "Commit": makeMethod(batchCommitOp{}, batchTag), 130 "Compact": makeMethod(compactOp{}, dbTag), 131 "Delete": makeMethod(deleteOp{}, dbTag, batchTag), 132 "DeleteRange": makeMethod(deleteRangeOp{}, dbTag, batchTag), 133 "First": makeMethod(iterFirstOp{}, iterTag), 134 "Flush": makeMethod(flushOp{}, dbTag), 135 "Get": makeMethod(getOp{}, dbTag, batchTag, snapTag), 136 "Ingest": makeMethod(ingestOp{}, dbTag), 137 "IngestAndExcise": makeMethod(ingestAndExciseOp{}, dbTag), 138 "Init": makeMethod(initOp{}, dbTag), 139 "Last": makeMethod(iterLastOp{}, iterTag), 140 "Merge": makeMethod(mergeOp{}, dbTag, batchTag), 141 "NewBatch": makeMethod(newBatchOp{}, dbTag), 142 "NewIndexedBatch": makeMethod(newIndexedBatchOp{}, dbTag), 143 "NewIter": makeMethod(newIterOp{}, dbTag, batchTag, snapTag), 144 "NewSnapshot": makeMethod(newSnapshotOp{}, dbTag), 145 "Next": makeMethod(iterNextOp{}, iterTag), 146 "NextPrefix": makeMethod(iterNextPrefixOp{}, iterTag), 147 "InternalNext": makeMethod(iterCanSingleDelOp{}, iterTag), 148 "Prev": makeMethod(iterPrevOp{}, iterTag), 149 "RangeKeyDelete": makeMethod(rangeKeyDeleteOp{}, dbTag, batchTag), 150 "RangeKeySet": makeMethod(rangeKeySetOp{}, dbTag, batchTag), 151 "RangeKeyUnset": makeMethod(rangeKeyUnsetOp{}, dbTag, batchTag), 152 "RatchetFormatMajorVersion": makeMethod(dbRatchetFormatMajorVersionOp{}, dbTag), 153 "Replicate": makeMethod(replicateOp{}, dbTag), 154 "Restart": makeMethod(dbRestartOp{}, dbTag), 155 "SeekGE": makeMethod(iterSeekGEOp{}, iterTag), 156 "SeekLT": makeMethod(iterSeekLTOp{}, iterTag), 157 "SeekPrefixGE": makeMethod(iterSeekPrefixGEOp{}, iterTag), 158 "Set": makeMethod(setOp{}, dbTag, batchTag), 159 "SetBounds": makeMethod(iterSetBoundsOp{}, iterTag), 160 "SetOptions": makeMethod(iterSetOptionsOp{}, iterTag), 161 "SingleDelete": makeMethod(singleDeleteOp{}, dbTag, batchTag), 162 } 163 164 type parser struct { 165 opts parserOpts 166 fset *token.FileSet 167 s scanner.Scanner 168 objs map[objID]bool 169 } 170 171 type parserOpts struct { 172 allowUndefinedObjs bool 173 } 174 175 func parse(src []byte, opts parserOpts) (_ []op, err error) { 176 // Various bits of magic incantation to set up a scanner for Go compatible 177 // syntax. We arranged for the textual format of ops (e.g. op.String()) to 178 // look like Go which allows us to use the Go scanner for parsing. 179 p := &parser{ 180 opts: opts, 181 fset: token.NewFileSet(), 182 objs: map[objID]bool{makeObjID(dbTag, 1): true, makeObjID(dbTag, 2): true}, 183 } 184 file := p.fset.AddFile("", -1, len(src)) 185 p.s.Init(file, src, nil /* no error handler */, 0) 186 return p.parse() 187 } 188 189 func (p *parser) parse() (_ []op, err error) { 190 defer func() { 191 if r := recover(); r != nil { 192 var ok bool 193 if err, ok = r.(error); ok { 194 return 195 } 196 err = errors.Errorf("%v", r) 197 } 198 }() 199 200 var ops []op 201 for { 202 op := p.parseOp() 203 if op == nil { 204 computeDerivedFields(ops) 205 return ops, nil 206 } 207 ops = append(ops, op) 208 } 209 } 210 211 func (p *parser) parseOp() op { 212 destPos, destTok, destLit := p.s.Scan() 213 if destTok == token.EOF { 214 return nil 215 } 216 if destTok != token.IDENT { 217 panic(p.errorf(destPos, "unexpected token: %s %q", destTok, destLit)) 218 } 219 if destLit == "Init" { 220 // <op>(<args>) 221 return p.makeOp(destLit, makeObjID(dbTag, 1), 0, destPos) 222 } 223 224 destID := p.parseObjID(destPos, destLit) 225 226 pos, tok, lit := p.s.Scan() 227 switch tok { 228 case token.PERIOD: 229 // <obj>.<op>(<args>) 230 if !p.objs[destID] { 231 if p.opts.allowUndefinedObjs { 232 p.objs[destID] = true 233 } else { 234 panic(p.errorf(destPos, "unknown object: %s", destID)) 235 } 236 } 237 _, methodLit := p.scanToken(token.IDENT) 238 return p.makeOp(methodLit, destID, 0, destPos) 239 240 case token.ASSIGN: 241 // <obj> = <obj>.<op>(<args>) 242 srcPos, srcLit := p.scanToken(token.IDENT) 243 srcID := p.parseObjID(srcPos, srcLit) 244 if !p.objs[srcID] { 245 if p.opts.allowUndefinedObjs { 246 p.objs[srcID] = true 247 } else { 248 panic(p.errorf(srcPos, "unknown object %q", srcLit)) 249 } 250 } 251 p.scanToken(token.PERIOD) 252 _, methodLit := p.scanToken(token.IDENT) 253 p.objs[destID] = true 254 return p.makeOp(methodLit, srcID, destID, srcPos) 255 } 256 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 257 } 258 259 func parseObjID(str string) (objID, error) { 260 var tag objTag 261 switch { 262 case strings.HasPrefix(str, "db"): 263 tag, str = dbTag, str[2:] 264 if str == "" { 265 str = "1" 266 } 267 case strings.HasPrefix(str, "batch"): 268 tag, str = batchTag, str[5:] 269 case strings.HasPrefix(str, "iter"): 270 tag, str = iterTag, str[4:] 271 case strings.HasPrefix(str, "snap"): 272 tag, str = snapTag, str[4:] 273 default: 274 return 0, errors.Newf("unable to parse objectID: %q", str) 275 } 276 id, err := strconv.ParseInt(str, 10, 32) 277 if err != nil { 278 return 0, err 279 } 280 return makeObjID(tag, uint32(id)), nil 281 } 282 283 func (p *parser) parseObjID(pos token.Pos, str string) objID { 284 id, err := parseObjID(str) 285 if err != nil { 286 panic(p.errorf(pos, "%s", err)) 287 } 288 return id 289 } 290 291 func unquoteBytes(lit string) []byte { 292 s, err := strconv.Unquote(lit) 293 if err != nil { 294 panic(err) 295 } 296 if len(s) == 0 { 297 return nil 298 } 299 return []byte(s) 300 } 301 302 func (p *parser) parseArgs(op op, methodName string, args []interface{}) { 303 pos, _ := p.scanToken(token.LPAREN) 304 for i := range args { 305 if i > 0 { 306 pos, _ = p.scanToken(token.COMMA) 307 } 308 309 switch t := args[i].(type) { 310 case *uint32: 311 _, lit := p.scanToken(token.INT) 312 val, err := strconv.ParseUint(lit, 10, 32) 313 if err != nil { 314 panic(err) 315 } 316 *t = uint32(val) 317 318 case *uint64: 319 _, lit := p.scanToken(token.INT) 320 val, err := strconv.ParseUint(lit, 10, 64) 321 if err != nil { 322 panic(err) 323 } 324 *t = uint64(val) 325 326 case *[]byte: 327 _, lit := p.scanToken(token.STRING) 328 *t = unquoteBytes(lit) 329 330 case *bool: 331 _, lit := p.scanToken(token.IDENT) 332 b, err := strconv.ParseBool(lit) 333 if err != nil { 334 panic(err) 335 } 336 *t = b 337 338 case *objID: 339 pos, lit := p.scanToken(token.IDENT) 340 *t = p.parseObjID(pos, lit) 341 342 case *[]pebble.KeyRange: 343 var pending pebble.KeyRange 344 for { 345 pos, tok, lit := p.s.Scan() 346 switch tok { 347 case token.STRING: 348 x := unquoteBytes(lit) 349 if pending.Start == nil { 350 pending.Start = x 351 } else { 352 pending.End = x 353 *t = append(*t, pending) 354 pending = pebble.KeyRange{} 355 } 356 pos, tok, lit := p.s.Scan() 357 switch tok { 358 case token.COMMA: 359 continue 360 case token.RPAREN: 361 p.scanToken(token.SEMICOLON) 362 return 363 default: 364 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 365 } 366 case token.RPAREN: 367 p.scanToken(token.SEMICOLON) 368 return 369 default: 370 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 371 } 372 } 373 374 case *[]objID: 375 for { 376 pos, tok, lit := p.s.Scan() 377 switch tok { 378 case token.IDENT: 379 *t = append(*t, p.parseObjID(pos, lit)) 380 pos, tok, lit := p.s.Scan() 381 switch tok { 382 case token.COMMA: 383 continue 384 case token.RPAREN: 385 p.scanToken(token.SEMICOLON) 386 return 387 default: 388 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 389 } 390 case token.RPAREN: 391 p.scanToken(token.SEMICOLON) 392 return 393 default: 394 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 395 } 396 } 397 398 case *[]pebble.CheckpointSpan: 399 pos, tok, lit := p.s.Scan() 400 switch tok { 401 case token.RPAREN: 402 // No spans. 403 *t = nil 404 p.scanToken(token.SEMICOLON) 405 return 406 407 case token.STRING: 408 var keys [][]byte 409 for { 410 s, err := strconv.Unquote(lit) 411 if err != nil { 412 panic(p.errorf(pos, "unquoting %q: %v", lit, err)) 413 } 414 keys = append(keys, []byte(s)) 415 416 pos, tok, lit = p.s.Scan() 417 switch tok { 418 case token.COMMA: 419 pos, tok, lit = p.s.Scan() 420 if tok != token.STRING { 421 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 422 } 423 continue 424 425 case token.RPAREN: 426 p.scanToken(token.SEMICOLON) 427 if len(keys)%2 == 1 { 428 panic(p.errorf(pos, "expected even number of keys")) 429 } 430 *t = make([]pebble.CheckpointSpan, len(keys)/2) 431 for i := range *t { 432 (*t)[i] = pebble.CheckpointSpan{ 433 Start: keys[i*2], 434 End: keys[i*2+1], 435 } 436 } 437 return 438 439 default: 440 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 441 } 442 } 443 444 default: 445 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 446 } 447 448 case *pebble.FormatMajorVersion: 449 _, lit := p.scanToken(token.INT) 450 val, err := strconv.ParseUint(lit, 10, 64) 451 if err != nil { 452 panic(err) 453 } 454 *t = pebble.FormatMajorVersion(val) 455 456 default: 457 panic(p.errorf(pos, "%s: unsupported arg[%d] type: %T", methodName, i, args[i])) 458 } 459 } 460 p.scanToken(token.RPAREN) 461 p.scanToken(token.SEMICOLON) 462 } 463 464 func (p *parser) scanToken(expected token.Token) (pos token.Pos, lit string) { 465 pos, tok, lit := p.s.Scan() 466 if tok != expected { 467 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 468 } 469 return pos, lit 470 } 471 472 func (p *parser) makeOp(methodName string, receiverID, targetID objID, pos token.Pos) op { 473 info := methods[methodName] 474 if info == nil { 475 panic(p.errorf(pos, "unknown op %s.%s", receiverID, methodName)) 476 } 477 if info.validTags&(1<<receiverID.tag()) == 0 { 478 panic(p.errorf(pos, "%s.%s: %s is not a method on %s", 479 receiverID, methodName, methodName, receiverID)) 480 } 481 482 op := info.constructor() 483 receiver, target, args := opArgs(op) 484 485 // The form of an operation is: 486 // [target =] receiver.method(args) 487 // 488 // The receiver is the object the operation will be called on, which can be 489 // any valid ID. Certain operations such as Ingest are only valid on the DB 490 // object. That is indicated by opArgs returning a nil receiver. 491 if receiver != nil { 492 *receiver = receiverID 493 } else if receiverID.tag() != dbTag { 494 panic(p.errorf(pos, "unknown op %s.%s", receiverID, methodName)) 495 } 496 497 // The target is the object that will be assigned the result of an object 498 // creation operation such as newBatchOp or newIterOp. 499 if target != nil { 500 // It is invalid to not have a targetID for a method which generates a new 501 // object. 502 if targetID == 0 { 503 panic(p.errorf(pos, "assignment expected for %s.%s", receiverID, methodName)) 504 } 505 // It is invalid to try to assign to the DB object. 506 if targetID.tag() == dbTag { 507 panic(p.errorf(pos, "cannot use %s as target of assignment", targetID)) 508 } 509 *target = targetID 510 } else if targetID != 0 { 511 panic(p.errorf(pos, "cannot use %s.%s in assignment", receiverID, methodName)) 512 } 513 514 p.parseArgs(op, methodName, args) 515 return op 516 } 517 518 func (p *parser) tokenf(tok token.Token, lit string) string { 519 if tok.IsLiteral() { 520 return lit 521 } 522 return tok.String() 523 } 524 525 func (p *parser) errorf(pos token.Pos, format string, args ...interface{}) error { 526 return errors.New(p.fset.Position(pos).String() + ": " + fmt.Sprintf(format, args...)) 527 } 528 529 // computeDerivedFields makes one pass through the provided operations, filling 530 // any derived fields. This pass must happen before execution because concurrent 531 // execution depends on these fields. 532 func computeDerivedFields(ops []op) { 533 iterToReader := make(map[objID]objID) 534 objToDB := make(map[objID]objID) 535 for i := range ops { 536 switch v := ops[i].(type) { 537 case *newSnapshotOp: 538 objToDB[v.snapID] = v.dbID 539 case *newIterOp: 540 iterToReader[v.iterID] = v.readerID 541 dbReaderID := v.readerID 542 if dbReaderID.tag() != dbTag { 543 dbReaderID = objToDB[dbReaderID] 544 } 545 objToDB[v.iterID] = dbReaderID 546 v.derivedDBID = dbReaderID 547 case *newIterUsingCloneOp: 548 v.derivedReaderID = iterToReader[v.existingIterID] 549 iterToReader[v.iterID] = v.derivedReaderID 550 objToDB[v.iterID] = objToDB[v.existingIterID] 551 case *iterSetOptionsOp: 552 v.derivedReaderID = iterToReader[v.iterID] 553 case *iterFirstOp: 554 v.derivedReaderID = iterToReader[v.iterID] 555 case *iterLastOp: 556 v.derivedReaderID = iterToReader[v.iterID] 557 case *iterSeekGEOp: 558 v.derivedReaderID = iterToReader[v.iterID] 559 case *iterSeekPrefixGEOp: 560 v.derivedReaderID = iterToReader[v.iterID] 561 case *iterSeekLTOp: 562 v.derivedReaderID = iterToReader[v.iterID] 563 case *iterNextOp: 564 v.derivedReaderID = iterToReader[v.iterID] 565 case *iterNextPrefixOp: 566 v.derivedReaderID = iterToReader[v.iterID] 567 case *iterCanSingleDelOp: 568 v.derivedReaderID = iterToReader[v.iterID] 569 case *iterPrevOp: 570 v.derivedReaderID = iterToReader[v.iterID] 571 case *newBatchOp: 572 objToDB[v.batchID] = v.dbID 573 case *newIndexedBatchOp: 574 objToDB[v.batchID] = v.dbID 575 case *applyOp: 576 if derivedDBID, ok := objToDB[v.batchID]; ok && v.writerID.tag() != dbTag { 577 objToDB[v.writerID] = derivedDBID 578 } 579 case *getOp: 580 if derivedDBID, ok := objToDB[v.readerID]; ok { 581 v.derivedDBID = derivedDBID 582 } 583 case *batchCommitOp: 584 v.dbID = objToDB[v.batchID] 585 case *closeOp: 586 if derivedDBID, ok := objToDB[v.objID]; ok && v.objID.tag() != dbTag { 587 v.derivedDBID = derivedDBID 588 } 589 case *ingestOp: 590 v.derivedDBIDs = make([]objID, len(v.batchIDs)) 591 for i := range v.batchIDs { 592 v.derivedDBIDs[i] = objToDB[v.batchIDs[i]] 593 } 594 case *ingestAndExciseOp: 595 v.derivedDBID = objToDB[v.batchID] 596 case *deleteOp: 597 derivedDBID := v.writerID 598 if v.writerID.tag() != dbTag { 599 derivedDBID = objToDB[v.writerID] 600 } 601 v.derivedDBID = derivedDBID 602 } 603 } 604 }