github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/metamorphic/parser.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package metamorphic 6 7 import ( 8 "fmt" 9 "go/scanner" 10 "go/token" 11 "reflect" 12 "strconv" 13 "strings" 14 15 "github.com/cockroachdb/errors" 16 "github.com/cockroachdb/pebble" 17 ) 18 19 type methodInfo struct { 20 constructor func() op 21 validTags uint32 22 } 23 24 func makeMethod(i interface{}, tags ...objTag) *methodInfo { 25 var validTags uint32 26 for _, tag := range tags { 27 validTags |= 1 << tag 28 } 29 30 t := reflect.TypeOf(i) 31 return &methodInfo{ 32 constructor: func() op { 33 return reflect.New(t).Interface().(op) 34 }, 35 validTags: validTags, 36 } 37 } 38 39 // args returns the receiverID, targetID and arguments for the op. The 40 // receiverID is the ID of the object the op will be applied to. The targetID 41 // is the ID of the object for assignment. If the method does not return a new 42 // object, then targetID will be nil. The argument list is just what it sounds 43 // like: the list of arguments for the operation. 44 func opArgs(op op) (receiverID *objID, targetID *objID, args []interface{}) { 45 switch t := op.(type) { 46 case *applyOp: 47 return &t.writerID, nil, []interface{}{&t.batchID} 48 case *checkpointOp: 49 return nil, nil, []interface{}{&t.spans} 50 case *closeOp: 51 return &t.objID, nil, nil 52 case *compactOp: 53 return nil, nil, []interface{}{&t.start, &t.end, &t.parallelize} 54 case *batchCommitOp: 55 return &t.batchID, nil, nil 56 case *dbRatchetFormatMajorVersionOp: 57 return nil, nil, []interface{}{&t.vers} 58 case *dbRestartOp: 59 return nil, nil, nil 60 case *deleteOp: 61 return &t.writerID, nil, []interface{}{&t.key} 62 case *deleteRangeOp: 63 return &t.writerID, nil, []interface{}{&t.start, &t.end} 64 case *iterFirstOp: 65 return &t.iterID, nil, nil 66 case *flushOp: 67 return nil, nil, nil 68 case *getOp: 69 return &t.readerID, nil, []interface{}{&t.key} 70 case *ingestOp: 71 return nil, nil, []interface{}{&t.batchIDs} 72 case *initOp: 73 return nil, nil, []interface{}{&t.batchSlots, &t.iterSlots, &t.snapshotSlots} 74 case *iterLastOp: 75 return &t.iterID, nil, nil 76 case *mergeOp: 77 return &t.writerID, nil, []interface{}{&t.key, &t.value} 78 case *newBatchOp: 79 return nil, &t.batchID, nil 80 case *newIndexedBatchOp: 81 return nil, &t.batchID, nil 82 case *newIterOp: 83 return &t.readerID, &t.iterID, []interface{}{&t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix} 84 case *newIterUsingCloneOp: 85 return &t.existingIterID, &t.iterID, []interface{}{&t.refreshBatch, &t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix} 86 case *newSnapshotOp: 87 return nil, &t.snapID, []interface{}{&t.bounds} 88 case *iterNextOp: 89 return &t.iterID, nil, []interface{}{&t.limit} 90 case *iterNextPrefixOp: 91 return &t.iterID, nil, nil 92 case *iterCanSingleDelOp: 93 return &t.iterID, nil, []interface{}{} 94 case *iterPrevOp: 95 return &t.iterID, nil, []interface{}{&t.limit} 96 case *iterSeekLTOp: 97 return &t.iterID, nil, []interface{}{&t.key, &t.limit} 98 case *iterSeekGEOp: 99 return &t.iterID, nil, []interface{}{&t.key, &t.limit} 100 case *iterSeekPrefixGEOp: 101 return &t.iterID, nil, []interface{}{&t.key} 102 case *setOp: 103 return &t.writerID, nil, []interface{}{&t.key, &t.value} 104 case *iterSetBoundsOp: 105 return &t.iterID, nil, []interface{}{&t.lower, &t.upper} 106 case *iterSetOptionsOp: 107 return &t.iterID, nil, []interface{}{&t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix} 108 case *singleDeleteOp: 109 return &t.writerID, nil, []interface{}{&t.key, &t.maybeReplaceDelete} 110 case *rangeKeyDeleteOp: 111 return &t.writerID, nil, []interface{}{&t.start, &t.end} 112 case *rangeKeySetOp: 113 return &t.writerID, nil, []interface{}{&t.start, &t.end, &t.suffix, &t.value} 114 case *rangeKeyUnsetOp: 115 return &t.writerID, nil, []interface{}{&t.start, &t.end, &t.suffix} 116 } 117 panic(fmt.Sprintf("unsupported op type: %T", op)) 118 } 119 120 var methods = map[string]*methodInfo{ 121 "Apply": makeMethod(applyOp{}, dbTag, batchTag), 122 "Checkpoint": makeMethod(checkpointOp{}, dbTag), 123 "Clone": makeMethod(newIterUsingCloneOp{}, iterTag), 124 "Close": makeMethod(closeOp{}, dbTag, batchTag, iterTag, snapTag), 125 "Commit": makeMethod(batchCommitOp{}, batchTag), 126 "Compact": makeMethod(compactOp{}, dbTag), 127 "Delete": makeMethod(deleteOp{}, dbTag, batchTag), 128 "DeleteRange": makeMethod(deleteRangeOp{}, dbTag, batchTag), 129 "First": makeMethod(iterFirstOp{}, iterTag), 130 "Flush": makeMethod(flushOp{}, dbTag), 131 "Get": makeMethod(getOp{}, dbTag, batchTag, snapTag), 132 "Ingest": makeMethod(ingestOp{}, dbTag), 133 "Init": makeMethod(initOp{}, dbTag), 134 "Last": makeMethod(iterLastOp{}, iterTag), 135 "Merge": makeMethod(mergeOp{}, dbTag, batchTag), 136 "NewBatch": makeMethod(newBatchOp{}, dbTag), 137 "NewIndexedBatch": makeMethod(newIndexedBatchOp{}, dbTag), 138 "NewIter": makeMethod(newIterOp{}, dbTag, batchTag, snapTag), 139 "NewSnapshot": makeMethod(newSnapshotOp{}, dbTag), 140 "Next": makeMethod(iterNextOp{}, iterTag), 141 "NextPrefix": makeMethod(iterNextPrefixOp{}, iterTag), 142 "InternalNext": makeMethod(iterCanSingleDelOp{}, iterTag), 143 "Prev": makeMethod(iterPrevOp{}, iterTag), 144 "RangeKeyDelete": makeMethod(rangeKeyDeleteOp{}, dbTag, batchTag), 145 "RangeKeySet": makeMethod(rangeKeySetOp{}, dbTag, batchTag), 146 "RangeKeyUnset": makeMethod(rangeKeyUnsetOp{}, dbTag, batchTag), 147 "RatchetFormatMajorVersion": makeMethod(dbRatchetFormatMajorVersionOp{}, dbTag), 148 "Restart": makeMethod(dbRestartOp{}, dbTag), 149 "SeekGE": makeMethod(iterSeekGEOp{}, iterTag), 150 "SeekLT": makeMethod(iterSeekLTOp{}, iterTag), 151 "SeekPrefixGE": makeMethod(iterSeekPrefixGEOp{}, iterTag), 152 "Set": makeMethod(setOp{}, dbTag, batchTag), 153 "SetBounds": makeMethod(iterSetBoundsOp{}, iterTag), 154 "SetOptions": makeMethod(iterSetOptionsOp{}, iterTag), 155 "SingleDelete": makeMethod(singleDeleteOp{}, dbTag, batchTag), 156 } 157 158 type parser struct { 159 fset *token.FileSet 160 s scanner.Scanner 161 objs map[objID]bool 162 } 163 164 func parse(src []byte) (_ []op, err error) { 165 // Various bits of magic incantation to set up a scanner for Go compatible 166 // syntax. We arranged for the textual format of ops (e.g. op.String()) to 167 // look like Go which allows us to use the Go scanner for parsing. 168 p := &parser{ 169 fset: token.NewFileSet(), 170 objs: map[objID]bool{makeObjID(dbTag, 0): true}, 171 } 172 file := p.fset.AddFile("", -1, len(src)) 173 p.s.Init(file, src, nil /* no error handler */, 0) 174 return p.parse() 175 } 176 177 func (p *parser) parse() (_ []op, err error) { 178 defer func() { 179 if r := recover(); r != nil { 180 var ok bool 181 if err, ok = r.(error); ok { 182 return 183 } 184 err = errors.Errorf("%v", r) 185 } 186 }() 187 188 var ops []op 189 for { 190 op := p.parseOp() 191 if op == nil { 192 computeDerivedFields(ops) 193 return ops, nil 194 } 195 ops = append(ops, op) 196 } 197 } 198 199 func (p *parser) parseOp() op { 200 destPos, destTok, destLit := p.s.Scan() 201 if destTok == token.EOF { 202 return nil 203 } 204 if destTok != token.IDENT { 205 panic(p.errorf(destPos, "unexpected token: %s %q", destTok, destLit)) 206 } 207 if destLit == "Init" { 208 // <op>(<args>) 209 return p.makeOp(destLit, makeObjID(dbTag, 0), 0, destPos) 210 } 211 212 destID := p.parseObjID(destPos, destLit) 213 214 pos, tok, lit := p.s.Scan() 215 switch tok { 216 case token.PERIOD: 217 // <obj>.<op>(<args>) 218 if !p.objs[destID] { 219 panic(p.errorf(destPos, "unknown object: %s", destID)) 220 } 221 _, methodLit := p.scanToken(token.IDENT) 222 return p.makeOp(methodLit, destID, 0, destPos) 223 224 case token.ASSIGN: 225 // <obj> = <obj>.<op>(<args>) 226 srcPos, srcLit := p.scanToken(token.IDENT) 227 srcID := p.parseObjID(srcPos, srcLit) 228 if !p.objs[srcID] { 229 panic(p.errorf(srcPos, "unknown object %q", srcLit)) 230 } 231 p.scanToken(token.PERIOD) 232 _, methodLit := p.scanToken(token.IDENT) 233 p.objs[destID] = true 234 return p.makeOp(methodLit, srcID, destID, srcPos) 235 } 236 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 237 } 238 239 func (p *parser) parseObjID(pos token.Pos, str string) objID { 240 var tag objTag 241 switch { 242 case str == "db": 243 return makeObjID(dbTag, 0) 244 case strings.HasPrefix(str, "batch"): 245 tag, str = batchTag, str[5:] 246 case strings.HasPrefix(str, "iter"): 247 tag, str = iterTag, str[4:] 248 case strings.HasPrefix(str, "snap"): 249 tag, str = snapTag, str[4:] 250 default: 251 panic(p.errorf(pos, "unable to parse objectID: %q", str)) 252 } 253 id, err := strconv.ParseInt(str, 10, 32) 254 if err != nil { 255 panic(p.errorf(pos, "%s", err)) 256 } 257 return makeObjID(tag, uint32(id)) 258 } 259 260 func unquoteBytes(lit string) []byte { 261 s, err := strconv.Unquote(lit) 262 if err != nil { 263 panic(err) 264 } 265 if len(s) == 0 { 266 return nil 267 } 268 return []byte(s) 269 } 270 271 func (p *parser) parseArgs(op op, methodName string, args []interface{}) { 272 pos, _ := p.scanToken(token.LPAREN) 273 for i := range args { 274 if i > 0 { 275 pos, _ = p.scanToken(token.COMMA) 276 } 277 278 switch t := args[i].(type) { 279 case *uint32: 280 _, lit := p.scanToken(token.INT) 281 val, err := strconv.ParseUint(lit, 10, 32) 282 if err != nil { 283 panic(err) 284 } 285 *t = uint32(val) 286 287 case *uint64: 288 _, lit := p.scanToken(token.INT) 289 val, err := strconv.ParseUint(lit, 10, 64) 290 if err != nil { 291 panic(err) 292 } 293 *t = uint64(val) 294 295 case *[]byte: 296 _, lit := p.scanToken(token.STRING) 297 *t = unquoteBytes(lit) 298 299 case *bool: 300 _, lit := p.scanToken(token.IDENT) 301 b, err := strconv.ParseBool(lit) 302 if err != nil { 303 panic(err) 304 } 305 *t = b 306 307 case *objID: 308 pos, lit := p.scanToken(token.IDENT) 309 *t = p.parseObjID(pos, lit) 310 311 case *[]pebble.KeyRange: 312 var pending pebble.KeyRange 313 for { 314 pos, tok, lit := p.s.Scan() 315 switch tok { 316 case token.STRING: 317 x := unquoteBytes(lit) 318 if pending.Start == nil { 319 pending.Start = x 320 } else { 321 pending.End = x 322 *t = append(*t, pending) 323 pending = pebble.KeyRange{} 324 } 325 pos, tok, lit := p.s.Scan() 326 switch tok { 327 case token.COMMA: 328 continue 329 case token.RPAREN: 330 p.scanToken(token.SEMICOLON) 331 return 332 default: 333 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 334 } 335 case token.RPAREN: 336 p.scanToken(token.SEMICOLON) 337 return 338 default: 339 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 340 } 341 } 342 343 case *[]objID: 344 for { 345 pos, tok, lit := p.s.Scan() 346 switch tok { 347 case token.IDENT: 348 *t = append(*t, p.parseObjID(pos, lit)) 349 pos, tok, lit := p.s.Scan() 350 switch tok { 351 case token.COMMA: 352 continue 353 case token.RPAREN: 354 p.scanToken(token.SEMICOLON) 355 return 356 default: 357 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 358 } 359 case token.RPAREN: 360 p.scanToken(token.SEMICOLON) 361 return 362 default: 363 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 364 } 365 } 366 367 case *[]pebble.CheckpointSpan: 368 pos, tok, lit := p.s.Scan() 369 switch tok { 370 case token.RPAREN: 371 // No spans. 372 *t = nil 373 p.scanToken(token.SEMICOLON) 374 return 375 376 case token.STRING: 377 var keys [][]byte 378 for { 379 s, err := strconv.Unquote(lit) 380 if err != nil { 381 panic(p.errorf(pos, "unquoting %q: %v", lit, err)) 382 } 383 keys = append(keys, []byte(s)) 384 385 pos, tok, lit = p.s.Scan() 386 switch tok { 387 case token.COMMA: 388 pos, tok, lit = p.s.Scan() 389 if tok != token.STRING { 390 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 391 } 392 continue 393 394 case token.RPAREN: 395 p.scanToken(token.SEMICOLON) 396 if len(keys)%2 == 1 { 397 panic(p.errorf(pos, "expected even number of keys")) 398 } 399 *t = make([]pebble.CheckpointSpan, len(keys)/2) 400 for i := range *t { 401 (*t)[i] = pebble.CheckpointSpan{ 402 Start: keys[i*2], 403 End: keys[i*2+1], 404 } 405 } 406 return 407 408 default: 409 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 410 } 411 } 412 413 default: 414 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 415 } 416 417 case *pebble.FormatMajorVersion: 418 _, lit := p.scanToken(token.INT) 419 val, err := strconv.ParseUint(lit, 10, 64) 420 if err != nil { 421 panic(err) 422 } 423 *t = pebble.FormatMajorVersion(val) 424 425 default: 426 panic(p.errorf(pos, "%s: unsupported arg[%d] type: %T", methodName, i, args[i])) 427 } 428 } 429 p.scanToken(token.RPAREN) 430 p.scanToken(token.SEMICOLON) 431 } 432 433 func (p *parser) scanToken(expected token.Token) (pos token.Pos, lit string) { 434 pos, tok, lit := p.s.Scan() 435 if tok != expected { 436 panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit))) 437 } 438 return pos, lit 439 } 440 441 func (p *parser) makeOp(methodName string, receiverID, targetID objID, pos token.Pos) op { 442 info := methods[methodName] 443 if info == nil { 444 panic(p.errorf(pos, "unknown op %s.%s", receiverID, methodName)) 445 } 446 if info.validTags&(1<<receiverID.tag()) == 0 { 447 panic(p.errorf(pos, "%s.%s: %s is not a method on %s", 448 receiverID, methodName, methodName, receiverID)) 449 } 450 451 op := info.constructor() 452 receiver, target, args := opArgs(op) 453 454 // The form of an operation is: 455 // [target =] receiver.method(args) 456 // 457 // The receiver is the object the operation will be called on, which can be 458 // any valid ID. Certain operations such as Ingest are only valid on the DB 459 // object. That is indicated by opArgs returning a nil receiver. 460 if receiver != nil { 461 *receiver = receiverID 462 } else if receiverID.tag() != dbTag { 463 panic(p.errorf(pos, "unknown op %s.%s", receiverID, methodName)) 464 } 465 466 // The target is the object that will be assigned the result of an object 467 // creation operation such as newBatchOp or newIterOp. 468 if target != nil { 469 // It is invalid to not have a targetID for a method which generates a new 470 // object. 471 if targetID == 0 { 472 panic(p.errorf(pos, "assignment expected for %s.%s", receiverID, methodName)) 473 } 474 // It is invalid to try to assign to the DB object. 475 if targetID.tag() == dbTag { 476 panic(p.errorf(pos, "cannot use %s as target of assignment", targetID)) 477 } 478 *target = targetID 479 } else if targetID != 0 { 480 panic(p.errorf(pos, "cannot use %s.%s in assignment", receiverID, methodName)) 481 } 482 483 p.parseArgs(op, methodName, args) 484 return op 485 } 486 487 func (p *parser) tokenf(tok token.Token, lit string) string { 488 if tok.IsLiteral() { 489 return lit 490 } 491 return tok.String() 492 } 493 494 func (p *parser) errorf(pos token.Pos, format string, args ...interface{}) error { 495 return errors.New(p.fset.Position(pos).String() + ": " + fmt.Sprintf(format, args...)) 496 } 497 498 // computeDerivedFields makes one pass through the provided operations, filling 499 // any derived fields. This pass must happen before execution because concurrent 500 // execution depends on these fields. 501 func computeDerivedFields(ops []op) { 502 iterToReader := make(map[objID]objID) 503 for i := range ops { 504 switch v := ops[i].(type) { 505 case *newIterOp: 506 iterToReader[v.iterID] = v.readerID 507 case *newIterUsingCloneOp: 508 v.derivedReaderID = iterToReader[v.existingIterID] 509 iterToReader[v.iterID] = v.derivedReaderID 510 case *iterSetOptionsOp: 511 v.derivedReaderID = iterToReader[v.iterID] 512 case *iterFirstOp: 513 v.derivedReaderID = iterToReader[v.iterID] 514 case *iterLastOp: 515 v.derivedReaderID = iterToReader[v.iterID] 516 case *iterSeekGEOp: 517 v.derivedReaderID = iterToReader[v.iterID] 518 case *iterSeekPrefixGEOp: 519 v.derivedReaderID = iterToReader[v.iterID] 520 case *iterSeekLTOp: 521 v.derivedReaderID = iterToReader[v.iterID] 522 case *iterNextOp: 523 v.derivedReaderID = iterToReader[v.iterID] 524 case *iterNextPrefixOp: 525 v.derivedReaderID = iterToReader[v.iterID] 526 case *iterCanSingleDelOp: 527 v.derivedReaderID = iterToReader[v.iterID] 528 case *iterPrevOp: 529 v.derivedReaderID = iterToReader[v.iterID] 530 } 531 } 532 }