github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/libkb/assertion_parser.go (about)

     1  // Copyright 2015 Keybase, Inc. All rights reserved. Use of
     2  // this source code is governed by the included BSD license.
     3  
     4  package libkb
     5  
     6  import (
     7  	"fmt"
     8  	"regexp"
     9  	"strings"
    10  )
    11  
    12  const (
    13  	NONE = iota
    14  	OR
    15  	AND
    16  	LPAREN
    17  	RPAREN
    18  	URL
    19  	EOF
    20  	ERROR
    21  )
    22  
    23  type Token struct {
    24  	Typ   int
    25  	value []byte
    26  }
    27  
    28  func (t Token) getString() string {
    29  	return string(t.value)
    30  }
    31  
    32  func (t Token) unexpectedError() error {
    33  	switch t.Typ {
    34  	case EOF:
    35  		return NewAssertionParseError("Unexpected EOF parsing assertion")
    36  	case ERROR:
    37  		// Nothing was matched when arrived at t.value.
    38  		return NewAssertionParseError("Syntax error when parsing: %v", t.getString())
    39  	default:
    40  		return NewAssertionParseError("Unexpected token: %s", t.getString())
    41  	}
    42  }
    43  
    44  func byteArrayEq(a1, a2 []byte) bool {
    45  	if len(a1) != len(a2) {
    46  		return false
    47  	}
    48  	for i, c := range a1 {
    49  		if c != a2[i] {
    50  			return false
    51  		}
    52  	}
    53  	return true
    54  }
    55  
    56  func (t Token) Eq(t2 Token) bool {
    57  	return (t.Typ == t2.Typ) && byteArrayEq(t.value, t2.value)
    58  }
    59  
    60  func NewToken(typ int) Token {
    61  	return Token{Typ: typ}
    62  }
    63  
    64  type Lexer struct {
    65  	buffer  []byte
    66  	last    Token
    67  	putback bool
    68  }
    69  
    70  // Disjunction: '||' ','
    71  // Conjunction: '&&' '+'
    72  // Parens: '(' ')'
    73  
    74  // URL:
    75  var lexerURLCharsRxx = `([^ \n\t&|()\[\],+#]+)` // anything but control chars
    76  var lexerURLSquareRxx = `(\[[^ \n\t#\[\]]*\])`  // square bracket syntax, allows pretty much anything in
    77  // URL has a character group and optionally square bracket groups
    78  var lexerURLRxx = `((` + lexerURLCharsRxx + lexerURLSquareRxx + `?)|(` + lexerURLSquareRxx + `?` + lexerURLCharsRxx + `))`
    79  
    80  var lexerItemRxx = regexp.MustCompile(`^((\|\|)|(\,)|(\&\&)|(\+)|(\()|(\))|` + lexerURLRxx + `)`)
    81  var lexerWhitespaceRxx = regexp.MustCompile(`^([\n\t ]+)`)
    82  
    83  func NewLexer(s string) *Lexer {
    84  	l := &Lexer{buffer: []byte(s)}
    85  	l.stripBuffer()
    86  	return l
    87  }
    88  
    89  // strip whitespace off the front
    90  func (lx *Lexer) stripBuffer() {
    91  	if len(lx.buffer) > 0 {
    92  		if match := lexerWhitespaceRxx.FindSubmatchIndex(lx.buffer); match != nil {
    93  			lx.buffer = lx.buffer[match[3]:]
    94  		}
    95  	}
    96  }
    97  
    98  func (lx *Lexer) advanceBuffer(i int) {
    99  	lx.buffer = lx.buffer[i:]
   100  	lx.stripBuffer()
   101  }
   102  
   103  func (lx *Lexer) Putback() {
   104  	lx.putback = true
   105  }
   106  
   107  func (lx *Lexer) Get() Token {
   108  	var ret Token
   109  	if lx.putback {
   110  		ret = lx.last
   111  		lx.putback = false
   112  	} else if len(lx.buffer) == 0 {
   113  		ret = NewToken(EOF)
   114  	} else if match := lexerItemRxx.FindSubmatchIndex(lx.buffer); match != nil {
   115  		// First 2 in seq are NONE: one for the full expr, another for the
   116  		// outer ^() group.
   117  
   118  		// NOTE: There are a lot more groups due to `lexerURLRxx` inclusion in
   119  		// `lexerItemRxx`, but they happen at the end and we ignore them. We
   120  		// only capture the "outer" group which is URL here. To keep things simple,
   121  		// make sure URL is the last group checked here.
   122  
   123  		seq := []int{NONE, NONE, OR, OR, AND, AND, LPAREN, RPAREN, URL}
   124  		for i := 2; i <= len(seq); i++ {
   125  			if match[i*2] >= 0 {
   126  				ret = Token{Typ: seq[i], value: lx.buffer[match[2*i]:match[2*i+1]]}
   127  				lx.advanceBuffer(match[2*i+1])
   128  				break
   129  			}
   130  		}
   131  	} else {
   132  		ret = Token{Typ: ERROR, value: lx.buffer}
   133  		lx.buffer = nil
   134  	}
   135  	lx.last = ret
   136  	return ret
   137  }
   138  
   139  type Parser struct {
   140  	lexer   *Lexer
   141  	err     error
   142  	andOnly bool
   143  }
   144  
   145  func NewParser(lexer *Lexer) *Parser {
   146  	ret := &Parser{lexer, nil, false}
   147  	return ret
   148  }
   149  
   150  func NewAssertionAnd(left, right AssertionExpression) AssertionAnd {
   151  	factors := []AssertionExpression{left, right}
   152  	return AssertionAnd{factors}
   153  }
   154  
   155  func NewAssertionOr(left, right AssertionExpression, symbol string) AssertionOr {
   156  	terms := []AssertionExpression{left, right}
   157  	return AssertionOr{
   158  		terms:  terms,
   159  		symbol: symbol,
   160  	}
   161  }
   162  
   163  func NewAssertionKeybaseUsername(username string) AssertionKeybase {
   164  	return AssertionKeybase{AssertionURLBase: AssertionURLBase{Key: "keybase", Value: username}}
   165  }
   166  
   167  func (p *Parser) Parse(ctx AssertionContext) AssertionExpression {
   168  	ret := p.parseExpr(ctx)
   169  	if ret != nil {
   170  		tok := p.lexer.Get()
   171  		switch tok.Typ {
   172  		case EOF:
   173  			// expected
   174  		case ERROR:
   175  			p.err = NewAssertionParseError("Found error at end of input (%s)",
   176  				tok.value)
   177  			ret = nil
   178  		default:
   179  			p.err = NewAssertionParseError("Found junk at end of input: %s",
   180  				tok.value)
   181  			ret = nil
   182  		}
   183  	}
   184  	return ret
   185  }
   186  
   187  func (p *Parser) parseTerm(ctx AssertionContext) (ret AssertionExpression) {
   188  	factor := p.parseFactor(ctx)
   189  	tok := p.lexer.Get()
   190  	if tok.Typ == AND {
   191  		term := p.parseTerm(ctx)
   192  		ret = NewAssertionAnd(factor, term)
   193  	} else {
   194  		ret = factor
   195  		p.lexer.Putback()
   196  	}
   197  	return ret
   198  }
   199  
   200  func (p *Parser) parseFactor(ctx AssertionContext) (ret AssertionExpression) {
   201  	tok := p.lexer.Get()
   202  	switch tok.Typ {
   203  	case URL:
   204  		url, err := ParseAssertionURL(ctx, tok.getString(), false)
   205  		if err != nil {
   206  			p.err = err
   207  		} else {
   208  			ret = url
   209  		}
   210  	case LPAREN:
   211  		if ex := p.parseExpr(ctx); ex == nil {
   212  			ret = nil
   213  			p.err = NewAssertionParseError("Illegal parenthetical expression")
   214  		} else {
   215  			tok = p.lexer.Get()
   216  			if tok.Typ == RPAREN {
   217  				ret = ex
   218  			} else {
   219  				ret = nil
   220  				p.err = NewAssertionParseError("Unbalanced parentheses")
   221  			}
   222  		}
   223  	default:
   224  		p.err = tok.unexpectedError()
   225  	}
   226  	return ret
   227  }
   228  
   229  func (p *Parser) parseExpr(ctx AssertionContext) (ret AssertionExpression) {
   230  	term := p.parseTerm(ctx)
   231  	tok := p.lexer.Get()
   232  	if tok.Typ != OR {
   233  		ret = term
   234  		p.lexer.Putback()
   235  	} else if p.andOnly {
   236  		p.err = NewAssertionParseErrorWithReason(
   237  			AssertionParseErrorReasonUnexpectedOR,
   238  			"Unexpected 'OR' operator (no '||'s or ','s allowed in this context)",
   239  		)
   240  	} else {
   241  		ex := p.parseExpr(ctx)
   242  		ret = NewAssertionOr(term, ex, string(tok.value))
   243  	}
   244  	return ret
   245  }
   246  
   247  func AssertionParse(ctx AssertionContext, s string) (AssertionExpression, error) {
   248  	lexer := NewLexer(s)
   249  	parser := Parser{
   250  		lexer:   lexer,
   251  		err:     nil,
   252  		andOnly: false,
   253  	}
   254  	ret := parser.Parse(ctx)
   255  	return ret, parser.err
   256  }
   257  
   258  func AssertionParseAndOnly(ctx AssertionContext, s string) (AssertionExpression, error) {
   259  	lexer := NewLexer(s)
   260  	parser := Parser{
   261  		lexer:   lexer,
   262  		err:     nil,
   263  		andOnly: true,
   264  	}
   265  	ret := parser.Parse(ctx)
   266  	return ret, parser.err
   267  }
   268  
   269  // Parse an assertion list like "alice,bob&&bob@twitter#char"
   270  // OR nodes are not allowed (asides from the commas)
   271  func ParseAssertionsWithReaders(ctx AssertionContext, assertions string) (writers, readers []AssertionExpression, err error) {
   272  	if len(assertions) == 0 {
   273  		return writers, readers, fmt.Errorf("empty assertion")
   274  	}
   275  
   276  	split := strings.Split(assertions, "#")
   277  	if len(split) > 2 {
   278  		return writers, readers, fmt.Errorf("too many reader divisions ('#') in assertions: %v", assertions)
   279  	}
   280  
   281  	writers, err = ParseAssertionList(ctx, split[0])
   282  	if err != nil {
   283  		return writers, readers, err
   284  	}
   285  
   286  	if len(split) >= 2 && len(split[1]) > 0 {
   287  		readers, err = ParseAssertionList(ctx, split[1])
   288  		if err != nil {
   289  			return writers, readers, err
   290  		}
   291  	}
   292  	return writers, readers, nil
   293  }
   294  
   295  // Parse a string into one or more assertions. Only AND assertions are allowed within each part.
   296  // like "alice,bob&&bob@twitter"
   297  func ParseAssertionList(ctx AssertionContext, assertionsStr string) (res []AssertionExpression, err error) {
   298  	expr, err := AssertionParse(ctx, assertionsStr)
   299  	if err != nil {
   300  		return res, err
   301  	}
   302  	return unpackAssertionList(expr)
   303  }
   304  
   305  // Unpack an assertion with one or more comma-separated parts. Only AND assertions are allowed within each part.
   306  func unpackAssertionList(expr AssertionExpression) (res []AssertionExpression, err error) {
   307  	switch expr := expr.(type) {
   308  	case AssertionOr:
   309  		// List (or recursive tree) of comma-separated items.
   310  
   311  		if expr.symbol != "," {
   312  			// Don't allow "||". That would be confusing.
   313  			return res, fmt.Errorf("disallowed OR expression: '%v'", expr.symbol)
   314  		}
   315  		for _, sub := range expr.terms {
   316  			// Recurse because "a,b,c" could look like (OR a (OR b c))
   317  			sublist, err := unpackAssertionList(sub)
   318  			if err != nil {
   319  				return res, err
   320  			}
   321  			res = append(res, sublist...)
   322  		}
   323  		return res, nil
   324  	default:
   325  		// Just one item.
   326  		err = checkAssertionListItem(expr)
   327  		return []AssertionExpression{expr}, err
   328  	}
   329  }
   330  
   331  // A single item in a comma-separated assertion list must not have any ORs in its subtree.
   332  func checkAssertionListItem(expr AssertionExpression) error {
   333  	if expr.HasOr() {
   334  		return fmt.Errorf("assertions with OR are not allowed here")
   335  	}
   336  	switch expr.(type) {
   337  	case AssertionOr:
   338  		// this should never happen
   339  		return fmt.Errorf("assertion parse fault: unexpected OR")
   340  	default:
   341  		// Anything else is allowed.
   342  		return nil
   343  	}
   344  }