github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/metamorphic/parser.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package metamorphic
     6  
     7  import (
     8  	"fmt"
     9  	"go/scanner"
    10  	"go/token"
    11  	"reflect"
    12  	"strconv"
    13  	"strings"
    14  
    15  	"github.com/cockroachdb/errors"
    16  	"github.com/cockroachdb/pebble"
    17  )
    18  
    19  type methodInfo struct {
    20  	constructor func() op
    21  	validTags   uint32
    22  }
    23  
    24  func makeMethod(i interface{}, tags ...objTag) *methodInfo {
    25  	var validTags uint32
    26  	for _, tag := range tags {
    27  		validTags |= 1 << tag
    28  	}
    29  
    30  	t := reflect.TypeOf(i)
    31  	return &methodInfo{
    32  		constructor: func() op {
    33  			return reflect.New(t).Interface().(op)
    34  		},
    35  		validTags: validTags,
    36  	}
    37  }
    38  
    39  // args returns the receiverID, targetID and arguments for the op. The
    40  // receiverID is the ID of the object the op will be applied to. The targetID
    41  // is the ID of the object for assignment. If the method does not return a new
    42  // object, then targetID will be nil. The argument list is just what it sounds
    43  // like: the list of arguments for the operation.
    44  func opArgs(op op) (receiverID *objID, targetID *objID, args []interface{}) {
    45  	switch t := op.(type) {
    46  	case *applyOp:
    47  		return &t.writerID, nil, []interface{}{&t.batchID}
    48  	case *checkpointOp:
    49  		return nil, nil, []interface{}{&t.spans}
    50  	case *closeOp:
    51  		return &t.objID, nil, nil
    52  	case *compactOp:
    53  		return nil, nil, []interface{}{&t.start, &t.end, &t.parallelize}
    54  	case *batchCommitOp:
    55  		return &t.batchID, nil, nil
    56  	case *dbRatchetFormatMajorVersionOp:
    57  		return nil, nil, []interface{}{&t.vers}
    58  	case *dbRestartOp:
    59  		return nil, nil, nil
    60  	case *deleteOp:
    61  		return &t.writerID, nil, []interface{}{&t.key}
    62  	case *deleteRangeOp:
    63  		return &t.writerID, nil, []interface{}{&t.start, &t.end}
    64  	case *iterFirstOp:
    65  		return &t.iterID, nil, nil
    66  	case *flushOp:
    67  		return nil, nil, nil
    68  	case *getOp:
    69  		return &t.readerID, nil, []interface{}{&t.key}
    70  	case *ingestOp:
    71  		return nil, nil, []interface{}{&t.batchIDs}
    72  	case *initOp:
    73  		return nil, nil, []interface{}{&t.batchSlots, &t.iterSlots, &t.snapshotSlots}
    74  	case *iterLastOp:
    75  		return &t.iterID, nil, nil
    76  	case *mergeOp:
    77  		return &t.writerID, nil, []interface{}{&t.key, &t.value}
    78  	case *newBatchOp:
    79  		return nil, &t.batchID, nil
    80  	case *newIndexedBatchOp:
    81  		return nil, &t.batchID, nil
    82  	case *newIterOp:
    83  		return &t.readerID, &t.iterID, []interface{}{&t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix}
    84  	case *newIterUsingCloneOp:
    85  		return &t.existingIterID, &t.iterID, []interface{}{&t.refreshBatch, &t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix}
    86  	case *newSnapshotOp:
    87  		return nil, &t.snapID, []interface{}{&t.bounds}
    88  	case *iterNextOp:
    89  		return &t.iterID, nil, []interface{}{&t.limit}
    90  	case *iterNextPrefixOp:
    91  		return &t.iterID, nil, nil
    92  	case *iterCanSingleDelOp:
    93  		return &t.iterID, nil, []interface{}{}
    94  	case *iterPrevOp:
    95  		return &t.iterID, nil, []interface{}{&t.limit}
    96  	case *iterSeekLTOp:
    97  		return &t.iterID, nil, []interface{}{&t.key, &t.limit}
    98  	case *iterSeekGEOp:
    99  		return &t.iterID, nil, []interface{}{&t.key, &t.limit}
   100  	case *iterSeekPrefixGEOp:
   101  		return &t.iterID, nil, []interface{}{&t.key}
   102  	case *setOp:
   103  		return &t.writerID, nil, []interface{}{&t.key, &t.value}
   104  	case *iterSetBoundsOp:
   105  		return &t.iterID, nil, []interface{}{&t.lower, &t.upper}
   106  	case *iterSetOptionsOp:
   107  		return &t.iterID, nil, []interface{}{&t.lower, &t.upper, &t.keyTypes, &t.filterMin, &t.filterMax, &t.useL6Filters, &t.maskSuffix}
   108  	case *singleDeleteOp:
   109  		return &t.writerID, nil, []interface{}{&t.key, &t.maybeReplaceDelete}
   110  	case *rangeKeyDeleteOp:
   111  		return &t.writerID, nil, []interface{}{&t.start, &t.end}
   112  	case *rangeKeySetOp:
   113  		return &t.writerID, nil, []interface{}{&t.start, &t.end, &t.suffix, &t.value}
   114  	case *rangeKeyUnsetOp:
   115  		return &t.writerID, nil, []interface{}{&t.start, &t.end, &t.suffix}
   116  	}
   117  	panic(fmt.Sprintf("unsupported op type: %T", op))
   118  }
   119  
   120  var methods = map[string]*methodInfo{
   121  	"Apply":                     makeMethod(applyOp{}, dbTag, batchTag),
   122  	"Checkpoint":                makeMethod(checkpointOp{}, dbTag),
   123  	"Clone":                     makeMethod(newIterUsingCloneOp{}, iterTag),
   124  	"Close":                     makeMethod(closeOp{}, dbTag, batchTag, iterTag, snapTag),
   125  	"Commit":                    makeMethod(batchCommitOp{}, batchTag),
   126  	"Compact":                   makeMethod(compactOp{}, dbTag),
   127  	"Delete":                    makeMethod(deleteOp{}, dbTag, batchTag),
   128  	"DeleteRange":               makeMethod(deleteRangeOp{}, dbTag, batchTag),
   129  	"First":                     makeMethod(iterFirstOp{}, iterTag),
   130  	"Flush":                     makeMethod(flushOp{}, dbTag),
   131  	"Get":                       makeMethod(getOp{}, dbTag, batchTag, snapTag),
   132  	"Ingest":                    makeMethod(ingestOp{}, dbTag),
   133  	"Init":                      makeMethod(initOp{}, dbTag),
   134  	"Last":                      makeMethod(iterLastOp{}, iterTag),
   135  	"Merge":                     makeMethod(mergeOp{}, dbTag, batchTag),
   136  	"NewBatch":                  makeMethod(newBatchOp{}, dbTag),
   137  	"NewIndexedBatch":           makeMethod(newIndexedBatchOp{}, dbTag),
   138  	"NewIter":                   makeMethod(newIterOp{}, dbTag, batchTag, snapTag),
   139  	"NewSnapshot":               makeMethod(newSnapshotOp{}, dbTag),
   140  	"Next":                      makeMethod(iterNextOp{}, iterTag),
   141  	"NextPrefix":                makeMethod(iterNextPrefixOp{}, iterTag),
   142  	"InternalNext":              makeMethod(iterCanSingleDelOp{}, iterTag),
   143  	"Prev":                      makeMethod(iterPrevOp{}, iterTag),
   144  	"RangeKeyDelete":            makeMethod(rangeKeyDeleteOp{}, dbTag, batchTag),
   145  	"RangeKeySet":               makeMethod(rangeKeySetOp{}, dbTag, batchTag),
   146  	"RangeKeyUnset":             makeMethod(rangeKeyUnsetOp{}, dbTag, batchTag),
   147  	"RatchetFormatMajorVersion": makeMethod(dbRatchetFormatMajorVersionOp{}, dbTag),
   148  	"Restart":                   makeMethod(dbRestartOp{}, dbTag),
   149  	"SeekGE":                    makeMethod(iterSeekGEOp{}, iterTag),
   150  	"SeekLT":                    makeMethod(iterSeekLTOp{}, iterTag),
   151  	"SeekPrefixGE":              makeMethod(iterSeekPrefixGEOp{}, iterTag),
   152  	"Set":                       makeMethod(setOp{}, dbTag, batchTag),
   153  	"SetBounds":                 makeMethod(iterSetBoundsOp{}, iterTag),
   154  	"SetOptions":                makeMethod(iterSetOptionsOp{}, iterTag),
   155  	"SingleDelete":              makeMethod(singleDeleteOp{}, dbTag, batchTag),
   156  }
   157  
   158  type parser struct {
   159  	fset *token.FileSet
   160  	s    scanner.Scanner
   161  	objs map[objID]bool
   162  }
   163  
   164  func parse(src []byte) (_ []op, err error) {
   165  	// Various bits of magic incantation to set up a scanner for Go compatible
   166  	// syntax. We arranged for the textual format of ops (e.g. op.String()) to
   167  	// look like Go which allows us to use the Go scanner for parsing.
   168  	p := &parser{
   169  		fset: token.NewFileSet(),
   170  		objs: map[objID]bool{makeObjID(dbTag, 0): true},
   171  	}
   172  	file := p.fset.AddFile("", -1, len(src))
   173  	p.s.Init(file, src, nil /* no error handler */, 0)
   174  	return p.parse()
   175  }
   176  
   177  func (p *parser) parse() (_ []op, err error) {
   178  	defer func() {
   179  		if r := recover(); r != nil {
   180  			var ok bool
   181  			if err, ok = r.(error); ok {
   182  				return
   183  			}
   184  			err = errors.Errorf("%v", r)
   185  		}
   186  	}()
   187  
   188  	var ops []op
   189  	for {
   190  		op := p.parseOp()
   191  		if op == nil {
   192  			computeDerivedFields(ops)
   193  			return ops, nil
   194  		}
   195  		ops = append(ops, op)
   196  	}
   197  }
   198  
   199  func (p *parser) parseOp() op {
   200  	destPos, destTok, destLit := p.s.Scan()
   201  	if destTok == token.EOF {
   202  		return nil
   203  	}
   204  	if destTok != token.IDENT {
   205  		panic(p.errorf(destPos, "unexpected token: %s %q", destTok, destLit))
   206  	}
   207  	if destLit == "Init" {
   208  		// <op>(<args>)
   209  		return p.makeOp(destLit, makeObjID(dbTag, 0), 0, destPos)
   210  	}
   211  
   212  	destID := p.parseObjID(destPos, destLit)
   213  
   214  	pos, tok, lit := p.s.Scan()
   215  	switch tok {
   216  	case token.PERIOD:
   217  		// <obj>.<op>(<args>)
   218  		if !p.objs[destID] {
   219  			panic(p.errorf(destPos, "unknown object: %s", destID))
   220  		}
   221  		_, methodLit := p.scanToken(token.IDENT)
   222  		return p.makeOp(methodLit, destID, 0, destPos)
   223  
   224  	case token.ASSIGN:
   225  		// <obj> = <obj>.<op>(<args>)
   226  		srcPos, srcLit := p.scanToken(token.IDENT)
   227  		srcID := p.parseObjID(srcPos, srcLit)
   228  		if !p.objs[srcID] {
   229  			panic(p.errorf(srcPos, "unknown object %q", srcLit))
   230  		}
   231  		p.scanToken(token.PERIOD)
   232  		_, methodLit := p.scanToken(token.IDENT)
   233  		p.objs[destID] = true
   234  		return p.makeOp(methodLit, srcID, destID, srcPos)
   235  	}
   236  	panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   237  }
   238  
   239  func (p *parser) parseObjID(pos token.Pos, str string) objID {
   240  	var tag objTag
   241  	switch {
   242  	case str == "db":
   243  		return makeObjID(dbTag, 0)
   244  	case strings.HasPrefix(str, "batch"):
   245  		tag, str = batchTag, str[5:]
   246  	case strings.HasPrefix(str, "iter"):
   247  		tag, str = iterTag, str[4:]
   248  	case strings.HasPrefix(str, "snap"):
   249  		tag, str = snapTag, str[4:]
   250  	default:
   251  		panic(p.errorf(pos, "unable to parse objectID: %q", str))
   252  	}
   253  	id, err := strconv.ParseInt(str, 10, 32)
   254  	if err != nil {
   255  		panic(p.errorf(pos, "%s", err))
   256  	}
   257  	return makeObjID(tag, uint32(id))
   258  }
   259  
   260  func unquoteBytes(lit string) []byte {
   261  	s, err := strconv.Unquote(lit)
   262  	if err != nil {
   263  		panic(err)
   264  	}
   265  	if len(s) == 0 {
   266  		return nil
   267  	}
   268  	return []byte(s)
   269  }
   270  
   271  func (p *parser) parseArgs(op op, methodName string, args []interface{}) {
   272  	pos, _ := p.scanToken(token.LPAREN)
   273  	for i := range args {
   274  		if i > 0 {
   275  			pos, _ = p.scanToken(token.COMMA)
   276  		}
   277  
   278  		switch t := args[i].(type) {
   279  		case *uint32:
   280  			_, lit := p.scanToken(token.INT)
   281  			val, err := strconv.ParseUint(lit, 10, 32)
   282  			if err != nil {
   283  				panic(err)
   284  			}
   285  			*t = uint32(val)
   286  
   287  		case *uint64:
   288  			_, lit := p.scanToken(token.INT)
   289  			val, err := strconv.ParseUint(lit, 10, 64)
   290  			if err != nil {
   291  				panic(err)
   292  			}
   293  			*t = uint64(val)
   294  
   295  		case *[]byte:
   296  			_, lit := p.scanToken(token.STRING)
   297  			*t = unquoteBytes(lit)
   298  
   299  		case *bool:
   300  			_, lit := p.scanToken(token.IDENT)
   301  			b, err := strconv.ParseBool(lit)
   302  			if err != nil {
   303  				panic(err)
   304  			}
   305  			*t = b
   306  
   307  		case *objID:
   308  			pos, lit := p.scanToken(token.IDENT)
   309  			*t = p.parseObjID(pos, lit)
   310  
   311  		case *[]pebble.KeyRange:
   312  			var pending pebble.KeyRange
   313  			for {
   314  				pos, tok, lit := p.s.Scan()
   315  				switch tok {
   316  				case token.STRING:
   317  					x := unquoteBytes(lit)
   318  					if pending.Start == nil {
   319  						pending.Start = x
   320  					} else {
   321  						pending.End = x
   322  						*t = append(*t, pending)
   323  						pending = pebble.KeyRange{}
   324  					}
   325  					pos, tok, lit := p.s.Scan()
   326  					switch tok {
   327  					case token.COMMA:
   328  						continue
   329  					case token.RPAREN:
   330  						p.scanToken(token.SEMICOLON)
   331  						return
   332  					default:
   333  						panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   334  					}
   335  				case token.RPAREN:
   336  					p.scanToken(token.SEMICOLON)
   337  					return
   338  				default:
   339  					panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   340  				}
   341  			}
   342  
   343  		case *[]objID:
   344  			for {
   345  				pos, tok, lit := p.s.Scan()
   346  				switch tok {
   347  				case token.IDENT:
   348  					*t = append(*t, p.parseObjID(pos, lit))
   349  					pos, tok, lit := p.s.Scan()
   350  					switch tok {
   351  					case token.COMMA:
   352  						continue
   353  					case token.RPAREN:
   354  						p.scanToken(token.SEMICOLON)
   355  						return
   356  					default:
   357  						panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   358  					}
   359  				case token.RPAREN:
   360  					p.scanToken(token.SEMICOLON)
   361  					return
   362  				default:
   363  					panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   364  				}
   365  			}
   366  
   367  		case *[]pebble.CheckpointSpan:
   368  			pos, tok, lit := p.s.Scan()
   369  			switch tok {
   370  			case token.RPAREN:
   371  				// No spans.
   372  				*t = nil
   373  				p.scanToken(token.SEMICOLON)
   374  				return
   375  
   376  			case token.STRING:
   377  				var keys [][]byte
   378  				for {
   379  					s, err := strconv.Unquote(lit)
   380  					if err != nil {
   381  						panic(p.errorf(pos, "unquoting %q: %v", lit, err))
   382  					}
   383  					keys = append(keys, []byte(s))
   384  
   385  					pos, tok, lit = p.s.Scan()
   386  					switch tok {
   387  					case token.COMMA:
   388  						pos, tok, lit = p.s.Scan()
   389  						if tok != token.STRING {
   390  							panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   391  						}
   392  						continue
   393  
   394  					case token.RPAREN:
   395  						p.scanToken(token.SEMICOLON)
   396  						if len(keys)%2 == 1 {
   397  							panic(p.errorf(pos, "expected even number of keys"))
   398  						}
   399  						*t = make([]pebble.CheckpointSpan, len(keys)/2)
   400  						for i := range *t {
   401  							(*t)[i] = pebble.CheckpointSpan{
   402  								Start: keys[i*2],
   403  								End:   keys[i*2+1],
   404  							}
   405  						}
   406  						return
   407  
   408  					default:
   409  						panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   410  					}
   411  				}
   412  
   413  			default:
   414  				panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   415  			}
   416  
   417  		case *pebble.FormatMajorVersion:
   418  			_, lit := p.scanToken(token.INT)
   419  			val, err := strconv.ParseUint(lit, 10, 64)
   420  			if err != nil {
   421  				panic(err)
   422  			}
   423  			*t = pebble.FormatMajorVersion(val)
   424  
   425  		default:
   426  			panic(p.errorf(pos, "%s: unsupported arg[%d] type: %T", methodName, i, args[i]))
   427  		}
   428  	}
   429  	p.scanToken(token.RPAREN)
   430  	p.scanToken(token.SEMICOLON)
   431  }
   432  
   433  func (p *parser) scanToken(expected token.Token) (pos token.Pos, lit string) {
   434  	pos, tok, lit := p.s.Scan()
   435  	if tok != expected {
   436  		panic(p.errorf(pos, "unexpected token: %q", p.tokenf(tok, lit)))
   437  	}
   438  	return pos, lit
   439  }
   440  
   441  func (p *parser) makeOp(methodName string, receiverID, targetID objID, pos token.Pos) op {
   442  	info := methods[methodName]
   443  	if info == nil {
   444  		panic(p.errorf(pos, "unknown op %s.%s", receiverID, methodName))
   445  	}
   446  	if info.validTags&(1<<receiverID.tag()) == 0 {
   447  		panic(p.errorf(pos, "%s.%s: %s is not a method on %s",
   448  			receiverID, methodName, methodName, receiverID))
   449  	}
   450  
   451  	op := info.constructor()
   452  	receiver, target, args := opArgs(op)
   453  
   454  	// The form of an operation is:
   455  	//   [target =] receiver.method(args)
   456  	//
   457  	// The receiver is the object the operation will be called on, which can be
   458  	// any valid ID. Certain operations such as Ingest are only valid on the DB
   459  	// object. That is indicated by opArgs returning a nil receiver.
   460  	if receiver != nil {
   461  		*receiver = receiverID
   462  	} else if receiverID.tag() != dbTag {
   463  		panic(p.errorf(pos, "unknown op %s.%s", receiverID, methodName))
   464  	}
   465  
   466  	// The target is the object that will be assigned the result of an object
   467  	// creation operation such as newBatchOp or newIterOp.
   468  	if target != nil {
   469  		// It is invalid to not have a targetID for a method which generates a new
   470  		// object.
   471  		if targetID == 0 {
   472  			panic(p.errorf(pos, "assignment expected for %s.%s", receiverID, methodName))
   473  		}
   474  		// It is invalid to try to assign to the DB object.
   475  		if targetID.tag() == dbTag {
   476  			panic(p.errorf(pos, "cannot use %s as target of assignment", targetID))
   477  		}
   478  		*target = targetID
   479  	} else if targetID != 0 {
   480  		panic(p.errorf(pos, "cannot use %s.%s in assignment", receiverID, methodName))
   481  	}
   482  
   483  	p.parseArgs(op, methodName, args)
   484  	return op
   485  }
   486  
   487  func (p *parser) tokenf(tok token.Token, lit string) string {
   488  	if tok.IsLiteral() {
   489  		return lit
   490  	}
   491  	return tok.String()
   492  }
   493  
   494  func (p *parser) errorf(pos token.Pos, format string, args ...interface{}) error {
   495  	return errors.New(p.fset.Position(pos).String() + ": " + fmt.Sprintf(format, args...))
   496  }
   497  
   498  // computeDerivedFields makes one pass through the provided operations, filling
   499  // any derived fields. This pass must happen before execution because concurrent
   500  // execution depends on these fields.
   501  func computeDerivedFields(ops []op) {
   502  	iterToReader := make(map[objID]objID)
   503  	for i := range ops {
   504  		switch v := ops[i].(type) {
   505  		case *newIterOp:
   506  			iterToReader[v.iterID] = v.readerID
   507  		case *newIterUsingCloneOp:
   508  			v.derivedReaderID = iterToReader[v.existingIterID]
   509  			iterToReader[v.iterID] = v.derivedReaderID
   510  		case *iterSetOptionsOp:
   511  			v.derivedReaderID = iterToReader[v.iterID]
   512  		case *iterFirstOp:
   513  			v.derivedReaderID = iterToReader[v.iterID]
   514  		case *iterLastOp:
   515  			v.derivedReaderID = iterToReader[v.iterID]
   516  		case *iterSeekGEOp:
   517  			v.derivedReaderID = iterToReader[v.iterID]
   518  		case *iterSeekPrefixGEOp:
   519  			v.derivedReaderID = iterToReader[v.iterID]
   520  		case *iterSeekLTOp:
   521  			v.derivedReaderID = iterToReader[v.iterID]
   522  		case *iterNextOp:
   523  			v.derivedReaderID = iterToReader[v.iterID]
   524  		case *iterNextPrefixOp:
   525  			v.derivedReaderID = iterToReader[v.iterID]
   526  		case *iterCanSingleDelOp:
   527  			v.derivedReaderID = iterToReader[v.iterID]
   528  		case *iterPrevOp:
   529  			v.derivedReaderID = iterToReader[v.iterID]
   530  		}
   531  	}
   532  }