github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/dsl/dsl.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  // Package dsl provides facilities for parsing lisp-like domain-specific
     6  // languages (DSL).
     7  package dsl
     8  
     9  import (
    10  	"fmt"
    11  	"go/scanner"
    12  	"go/token"
    13  	"strconv"
    14  	"strings"
    15  
    16  	"github.com/cockroachdb/errors"
    17  )
    18  
    19  // NewParser constructs a new Parser of a lisp-like DSL.
    20  func NewParser[T any]() *Parser[T] {
    21  	p := new(Parser[T])
    22  	p.constants = make(map[string]func() T)
    23  	p.funcs = make(map[string]func(*Parser[T], *Scanner) T)
    24  	return p
    25  }
    26  
    27  // NewPredicateParser constructs a new Parser of a Lisp-like DSL, where the
    28  // resulting type implements Predicate[E]. NewPredicateParser predefines a few
    29  // useful functions: Not, And, Or, OnIndex.
    30  func NewPredicateParser[E any]() *Parser[Predicate[E]] {
    31  	p := NewParser[Predicate[E]]()
    32  	p.DefineFunc("Not", parseNot[E])
    33  	p.DefineFunc("And", parseAnd[E])
    34  	p.DefineFunc("Or", parseOr[E])
    35  	p.DefineFunc("OnIndex", parseOnIndex[E])
    36  	return p
    37  }
    38  
    39  // A Parser holds the rules and logic for parsing a DSL.
    40  type Parser[T any] struct {
    41  	constants map[string]func() T
    42  	funcs     map[string]func(*Parser[T], *Scanner) T
    43  }
    44  
    45  // DefineConstant adds a new constant to the Parser's supported DSL. Whenever
    46  // the provided identifier is used within a constant context, the provided
    47  // closure is invoked to instantiate an appropriate AST value.
    48  func (p *Parser[T]) DefineConstant(identifier string, instantiate func() T) {
    49  	p.constants[identifier] = instantiate
    50  }
    51  
    52  // DefineFunc adds a new func to the Parser's supported DSL. Whenever the
    53  // provided identifier is used within a function invocation context, the
    54  // provided closure is invoked to instantiate an appropriate AST value.
    55  func (p *Parser[T]) DefineFunc(identifier string, parseFunc func(*Parser[T], *Scanner) T) {
    56  	p.funcs[identifier] = parseFunc
    57  }
    58  
    59  // Parse parses the provided input string.
    60  func (p *Parser[T]) Parse(d string) (ret T, err error) {
    61  	defer func() {
    62  		if r := recover(); r != nil {
    63  			var ok bool
    64  			err, ok = r.(error)
    65  			if !ok {
    66  				panic(r)
    67  			}
    68  		}
    69  	}()
    70  
    71  	fset := token.NewFileSet()
    72  	file := fset.AddFile("", -1, len(d))
    73  	var s Scanner
    74  	s.Init(file, []byte(strings.TrimSpace(d)), nil /* no error handler */, 0)
    75  	tok := s.Scan()
    76  	ret = p.ParseFromPos(&s, tok)
    77  	tok = s.Scan()
    78  	if tok.Kind == token.SEMICOLON {
    79  		tok = s.Scan()
    80  	}
    81  	assertTok(tok, token.EOF)
    82  	return ret, err
    83  }
    84  
    85  // ParseFromPos parses from the provided current position and associated
    86  // scanner. If the parser fails to parse, it panics. This function is intended
    87  // to be used when composing Parsers of various types.
    88  func (p *Parser[T]) ParseFromPos(s *Scanner, tok Token) T {
    89  	switch tok.Kind {
    90  	case token.IDENT:
    91  		// A constant without any parens, eg. `Reads`.
    92  		p, ok := p.constants[tok.Lit]
    93  		if !ok {
    94  			panic(errors.Errorf("dsl: unknown constant %q", tok.Lit))
    95  		}
    96  		return p()
    97  	case token.LPAREN:
    98  		// Otherwise it's an expression, eg: (OnIndex 1)
    99  		tok = s.Consume(token.IDENT)
   100  		fp, ok := p.funcs[tok.Lit]
   101  		if !ok {
   102  			panic(errors.Errorf("dsl: unknown func %q", tok.Lit))
   103  		}
   104  		return fp(p, s)
   105  	default:
   106  		panic(errors.Errorf("dsl: unexpected token %s; expected IDENT or LPAREN", tok.String()))
   107  	}
   108  }
   109  
   110  // A Scanner holds the scanner's internal state while processing a given text.
   111  type Scanner struct {
   112  	scanner.Scanner
   113  }
   114  
   115  // Scan scans the next token and returns it.
   116  func (s *Scanner) Scan() Token {
   117  	pos, tok, lit := s.Scanner.Scan()
   118  	return Token{pos, tok, lit}
   119  }
   120  
   121  // Consume scans the next token. If the token is not of the provided token, it
   122  // panics. It returns the token itself.
   123  func (s *Scanner) Consume(expect token.Token) Token {
   124  	t := s.Scan()
   125  	assertTok(t, expect)
   126  	return t
   127  }
   128  
   129  // ConsumeString scans the next token. It panics if the next token is not a
   130  // string, or if unable to unquote the string. It returns the unquoted string
   131  // contents.
   132  func (s *Scanner) ConsumeString() string {
   133  	lit := s.Consume(token.STRING).Lit
   134  	str, err := strconv.Unquote(lit)
   135  	if err != nil {
   136  		panic(errors.Newf("dsl: unquoting %q: %v", lit, err))
   137  	}
   138  	return str
   139  }
   140  
   141  // Token is a lexical token scanned from an input text.
   142  type Token struct {
   143  	pos  token.Pos
   144  	Kind token.Token
   145  	Lit  string
   146  }
   147  
   148  // String implements fmt.Stringer.
   149  func (t *Token) String() string {
   150  	if t.Lit != "" {
   151  		return fmt.Sprintf("(%s, %q) at pos %v", t.Kind, t.Lit, t.pos)
   152  	}
   153  	return fmt.Sprintf("%s at pos %v", t.Kind, t.pos)
   154  }
   155  
   156  func assertTok(tok Token, expect token.Token) {
   157  	if tok.Kind != expect {
   158  		panic(errors.Errorf("dsl: unexpected token %s; expected %s", tok.String(), expect))
   159  	}
   160  }