github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/pkg/schemadsl/lexer/lex_def.go (about)

     1  //go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType
     2  
     3  package lexer
     4  
     5  import (
     6  	"unicode"
     7  
     8  	"github.com/authzed/spicedb/pkg/schemadsl/input"
     9  )
    10  
    11  // Lex creates a new scanner for the input string.
    12  func Lex(source input.Source, input string) *Lexer {
    13  	return createLexer(source, input)
    14  }
    15  
    16  // TokenType identifies the type of lexer lexemes.
    17  type TokenType int
    18  
    19  const (
    20  	TokenTypeError TokenType = iota // error occurred; value is text of error
    21  
    22  	// Synthetic semicolon
    23  	TokenTypeSyntheticSemicolon
    24  
    25  	TokenTypeEOF
    26  	TokenTypeWhitespace
    27  	TokenTypeSinglelineComment
    28  	TokenTypeMultilineComment
    29  	TokenTypeNewline
    30  
    31  	TokenTypeKeyword    // interface
    32  	TokenTypeIdentifier // helloworld
    33  	TokenTypeNumber     // 123
    34  
    35  	TokenTypeLeftBrace  // {
    36  	TokenTypeRightBrace // }
    37  	TokenTypeLeftParen  // (
    38  	TokenTypeRightParen // )
    39  
    40  	TokenTypePipe  // |
    41  	TokenTypePlus  // +
    42  	TokenTypeMinus // -
    43  	TokenTypeAnd   // &
    44  	TokenTypeDiv   // /
    45  
    46  	TokenTypeEquals     // =
    47  	TokenTypeColon      // :
    48  	TokenTypeSemicolon  // ;
    49  	TokenTypeRightArrow // ->
    50  	TokenTypeHash       // #
    51  	TokenTypeEllipsis   // ...
    52  	TokenTypeStar       // *
    53  
    54  	// Additional tokens for CEL: https://github.com/google/cel-spec/blob/master/doc/langdef.md#syntax
    55  	TokenTypeQuestionMark       // ?
    56  	TokenTypeConditionalOr      // ||
    57  	TokenTypeConditionalAnd     // &&
    58  	TokenTypeExclamationPoint   // !
    59  	TokenTypeLeftBracket        // [
    60  	TokenTypeRightBracket       // ]
    61  	TokenTypePeriod             // .
    62  	TokenTypeComma              // ,
    63  	TokenTypePercent            // %
    64  	TokenTypeLessThan           // <
    65  	TokenTypeGreaterThan        // >
    66  	TokenTypeLessThanOrEqual    // <=
    67  	TokenTypeGreaterThanOrEqual // >=
    68  	TokenTypeEqualEqual         // ==
    69  	TokenTypeNotEqual           // !=
    70  	TokenTypeString             // "...", '...', """...""", '''...'''
    71  )
    72  
    73  // keywords contains the full set of keywords supported.
    74  var keywords = map[string]struct{}{
    75  	"definition": {},
    76  	"caveat":     {},
    77  	"relation":   {},
    78  	"permission": {},
    79  	"nil":        {},
    80  	"with":       {},
    81  }
    82  
    83  // IsKeyword returns whether the specified input string is a reserved keyword.
    84  func IsKeyword(candidate string) bool {
    85  	_, ok := keywords[candidate]
    86  	return ok
    87  }
    88  
    89  // syntheticPredecessors contains the full set of token types after which, if a newline is found,
    90  // we emit a synthetic semicolon rather than a normal newline token.
    91  var syntheticPredecessors = map[TokenType]bool{
    92  	TokenTypeIdentifier: true,
    93  	TokenTypeKeyword:    true,
    94  
    95  	TokenTypeRightBrace: true,
    96  	TokenTypeRightParen: true,
    97  
    98  	TokenTypeStar: true,
    99  }
   100  
   101  // lexerEntrypoint scans until EOFRUNE
   102  func lexerEntrypoint(l *Lexer) stateFn {
   103  Loop:
   104  	for {
   105  		switch r := l.next(); {
   106  		case r == EOFRUNE:
   107  			break Loop
   108  
   109  		case r == '{':
   110  			l.emit(TokenTypeLeftBrace)
   111  
   112  		case r == '}':
   113  			l.emit(TokenTypeRightBrace)
   114  
   115  		case r == '(':
   116  			l.emit(TokenTypeLeftParen)
   117  
   118  		case r == ')':
   119  			l.emit(TokenTypeRightParen)
   120  
   121  		case r == '+':
   122  			l.emit(TokenTypePlus)
   123  
   124  		case r == '|':
   125  			if l.acceptString("|") {
   126  				l.emit(TokenTypeConditionalOr)
   127  			} else {
   128  				l.emit(TokenTypePipe)
   129  			}
   130  
   131  		case r == '&':
   132  			if l.acceptString("&") {
   133  				l.emit(TokenTypeConditionalAnd)
   134  			} else {
   135  				l.emit(TokenTypeAnd)
   136  			}
   137  
   138  		case r == '?':
   139  			l.emit(TokenTypeQuestionMark)
   140  
   141  		case r == '!':
   142  			if l.acceptString("=") {
   143  				l.emit(TokenTypeNotEqual)
   144  			} else {
   145  				l.emit(TokenTypeExclamationPoint)
   146  			}
   147  
   148  		case r == '[':
   149  			l.emit(TokenTypeLeftBracket)
   150  
   151  		case r == ']':
   152  			l.emit(TokenTypeRightBracket)
   153  
   154  		case r == '%':
   155  			l.emit(TokenTypePercent)
   156  
   157  		case r == '<':
   158  			if l.acceptString("=") {
   159  				l.emit(TokenTypeLessThanOrEqual)
   160  			} else {
   161  				l.emit(TokenTypeLessThan)
   162  			}
   163  
   164  		case r == '>':
   165  			if l.acceptString("=") {
   166  				l.emit(TokenTypeGreaterThanOrEqual)
   167  			} else {
   168  				l.emit(TokenTypeGreaterThan)
   169  			}
   170  
   171  		case r == ',':
   172  			l.emit(TokenTypeComma)
   173  
   174  		case r == '=':
   175  			if l.acceptString("=") {
   176  				l.emit(TokenTypeEqualEqual)
   177  			} else {
   178  				l.emit(TokenTypeEquals)
   179  			}
   180  
   181  		case r == ':':
   182  			l.emit(TokenTypeColon)
   183  
   184  		case r == ';':
   185  			l.emit(TokenTypeSemicolon)
   186  
   187  		case r == '#':
   188  			l.emit(TokenTypeHash)
   189  
   190  		case r == '*':
   191  			l.emit(TokenTypeStar)
   192  
   193  		case r == '.':
   194  			if l.acceptString("..") {
   195  				l.emit(TokenTypeEllipsis)
   196  			} else {
   197  				l.emit(TokenTypePeriod)
   198  			}
   199  
   200  		case r == '-':
   201  			if l.accept(">") {
   202  				l.emit(TokenTypeRightArrow)
   203  			} else {
   204  				l.emit(TokenTypeMinus)
   205  			}
   206  
   207  		case isSpace(r):
   208  			l.emit(TokenTypeWhitespace)
   209  
   210  		case isNewline(r):
   211  			// If the previous token matches the synthetic semicolon list,
   212  			// we emit a synthetic semicolon instead of a simple newline.
   213  			if _, ok := syntheticPredecessors[l.lastNonIgnoredToken.Kind]; ok {
   214  				l.emit(TokenTypeSyntheticSemicolon)
   215  			} else {
   216  				l.emit(TokenTypeNewline)
   217  			}
   218  
   219  		case isAlphaNumeric(r):
   220  			l.backup()
   221  			return lexIdentifierOrKeyword
   222  
   223  		case r == '\'' || r == '"':
   224  			l.backup()
   225  			return lexStringLiteral
   226  
   227  		case r == '/':
   228  			// Check for comments.
   229  			if l.peekValue("/") {
   230  				l.backup()
   231  				return lexSinglelineComment
   232  			}
   233  
   234  			if l.peekValue("*") {
   235  				l.backup()
   236  				return lexMultilineComment
   237  			}
   238  
   239  			l.emit(TokenTypeDiv)
   240  		default:
   241  			return l.errorf(r, "unrecognized character at this location: %#U", r)
   242  		}
   243  	}
   244  
   245  	l.emit(TokenTypeEOF)
   246  	return nil
   247  }
   248  
   249  // lexStringLiteral scan until the close of the string literal or EOFRUNE
   250  func lexStringLiteral(l *Lexer) stateFn {
   251  	allowNewlines := false
   252  	terminator := ""
   253  
   254  	if l.acceptString(`"""`) {
   255  		terminator = `"""`
   256  		allowNewlines = true
   257  	} else if l.acceptString(`'''`) {
   258  		terminator = `"""`
   259  		allowNewlines = true
   260  	} else if l.acceptString(`"`) {
   261  		terminator = `"`
   262  	} else if l.acceptString(`'`) {
   263  		terminator = `'`
   264  	}
   265  
   266  	for {
   267  		if l.peekValue(terminator) {
   268  			l.acceptString(terminator)
   269  			l.emit(TokenTypeString)
   270  			return lexSource
   271  		}
   272  
   273  		// Otherwise, consume until we hit EOFRUNE.
   274  		r := l.next()
   275  		if !allowNewlines && isNewline(r) {
   276  			return l.errorf(r, "Unterminated string")
   277  		}
   278  
   279  		if r == EOFRUNE {
   280  			return l.errorf(r, "Unterminated string")
   281  		}
   282  	}
   283  }
   284  
   285  // lexSinglelineComment scans until newline or EOFRUNE
   286  func lexSinglelineComment(l *Lexer) stateFn {
   287  	checker := func(r rune) (bool, error) {
   288  		result := r == EOFRUNE || isNewline(r)
   289  		return !result, nil
   290  	}
   291  
   292  	l.acceptString("//")
   293  	return buildLexUntil(TokenTypeSinglelineComment, checker)
   294  }
   295  
   296  // lexMultilineComment scans until the close of the multiline comment or EOFRUNE
   297  func lexMultilineComment(l *Lexer) stateFn {
   298  	l.acceptString("/*")
   299  	for {
   300  		// Check for the end of the multiline comment.
   301  		if l.peekValue("*/") {
   302  			l.acceptString("*/")
   303  			l.emit(TokenTypeMultilineComment)
   304  			return lexSource
   305  		}
   306  
   307  		// Otherwise, consume until we hit EOFRUNE.
   308  		r := l.next()
   309  		if r == EOFRUNE {
   310  			return l.errorf(r, "Unterminated multiline comment")
   311  		}
   312  	}
   313  }
   314  
   315  // lexIdentifierOrKeyword searches for a keyword or literal identifier.
   316  func lexIdentifierOrKeyword(l *Lexer) stateFn {
   317  	for {
   318  		if !isAlphaNumeric(l.peek()) {
   319  			break
   320  		}
   321  
   322  		l.next()
   323  	}
   324  
   325  	_, isKeyword := keywords[l.value()]
   326  
   327  	switch {
   328  	case isKeyword:
   329  		l.emit(TokenTypeKeyword)
   330  
   331  	default:
   332  		l.emit(TokenTypeIdentifier)
   333  	}
   334  
   335  	return lexSource
   336  }
   337  
   338  // isSpace reports whether r is a space character.
   339  func isSpace(r rune) bool {
   340  	return r == ' ' || r == '\t'
   341  }
   342  
   343  // isNewline reports whether r is a newline character.
   344  func isNewline(r rune) bool {
   345  	return r == '\r' || r == '\n'
   346  }
   347  
   348  // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
   349  func isAlphaNumeric(r rune) bool {
   350  	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
   351  }