github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/pkg/schemadsl/lexer/lex_def.go (about) 1 //go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType 2 3 package lexer 4 5 import ( 6 "unicode" 7 8 "github.com/authzed/spicedb/pkg/schemadsl/input" 9 ) 10 11 // Lex creates a new scanner for the input string. 12 func Lex(source input.Source, input string) *Lexer { 13 return createLexer(source, input) 14 } 15 16 // TokenType identifies the type of lexer lexemes. 17 type TokenType int 18 19 const ( 20 TokenTypeError TokenType = iota // error occurred; value is text of error 21 22 // Synthetic semicolon 23 TokenTypeSyntheticSemicolon 24 25 TokenTypeEOF 26 TokenTypeWhitespace 27 TokenTypeSinglelineComment 28 TokenTypeMultilineComment 29 TokenTypeNewline 30 31 TokenTypeKeyword // interface 32 TokenTypeIdentifier // helloworld 33 TokenTypeNumber // 123 34 35 TokenTypeLeftBrace // { 36 TokenTypeRightBrace // } 37 TokenTypeLeftParen // ( 38 TokenTypeRightParen // ) 39 40 TokenTypePipe // | 41 TokenTypePlus // + 42 TokenTypeMinus // - 43 TokenTypeAnd // & 44 TokenTypeDiv // / 45 46 TokenTypeEquals // = 47 TokenTypeColon // : 48 TokenTypeSemicolon // ; 49 TokenTypeRightArrow // -> 50 TokenTypeHash // # 51 TokenTypeEllipsis // ... 52 TokenTypeStar // * 53 54 // Additional tokens for CEL: https://github.com/google/cel-spec/blob/master/doc/langdef.md#syntax 55 TokenTypeQuestionMark // ? 56 TokenTypeConditionalOr // || 57 TokenTypeConditionalAnd // && 58 TokenTypeExclamationPoint // ! 59 TokenTypeLeftBracket // [ 60 TokenTypeRightBracket // ] 61 TokenTypePeriod // . 62 TokenTypeComma // , 63 TokenTypePercent // % 64 TokenTypeLessThan // < 65 TokenTypeGreaterThan // > 66 TokenTypeLessThanOrEqual // <= 67 TokenTypeGreaterThanOrEqual // >= 68 TokenTypeEqualEqual // == 69 TokenTypeNotEqual // != 70 TokenTypeString // "...", '...', """...""", '''...''' 71 ) 72 73 // keywords contains the full set of keywords supported. 74 var keywords = map[string]struct{}{ 75 "definition": {}, 76 "caveat": {}, 77 "relation": {}, 78 "permission": {}, 79 "nil": {}, 80 "with": {}, 81 } 82 83 // IsKeyword returns whether the specified input string is a reserved keyword. 84 func IsKeyword(candidate string) bool { 85 _, ok := keywords[candidate] 86 return ok 87 } 88 89 // syntheticPredecessors contains the full set of token types after which, if a newline is found, 90 // we emit a synthetic semicolon rather than a normal newline token. 91 var syntheticPredecessors = map[TokenType]bool{ 92 TokenTypeIdentifier: true, 93 TokenTypeKeyword: true, 94 95 TokenTypeRightBrace: true, 96 TokenTypeRightParen: true, 97 98 TokenTypeStar: true, 99 } 100 101 // lexerEntrypoint scans until EOFRUNE 102 func lexerEntrypoint(l *Lexer) stateFn { 103 Loop: 104 for { 105 switch r := l.next(); { 106 case r == EOFRUNE: 107 break Loop 108 109 case r == '{': 110 l.emit(TokenTypeLeftBrace) 111 112 case r == '}': 113 l.emit(TokenTypeRightBrace) 114 115 case r == '(': 116 l.emit(TokenTypeLeftParen) 117 118 case r == ')': 119 l.emit(TokenTypeRightParen) 120 121 case r == '+': 122 l.emit(TokenTypePlus) 123 124 case r == '|': 125 if l.acceptString("|") { 126 l.emit(TokenTypeConditionalOr) 127 } else { 128 l.emit(TokenTypePipe) 129 } 130 131 case r == '&': 132 if l.acceptString("&") { 133 l.emit(TokenTypeConditionalAnd) 134 } else { 135 l.emit(TokenTypeAnd) 136 } 137 138 case r == '?': 139 l.emit(TokenTypeQuestionMark) 140 141 case r == '!': 142 if l.acceptString("=") { 143 l.emit(TokenTypeNotEqual) 144 } else { 145 l.emit(TokenTypeExclamationPoint) 146 } 147 148 case r == '[': 149 l.emit(TokenTypeLeftBracket) 150 151 case r == ']': 152 l.emit(TokenTypeRightBracket) 153 154 case r == '%': 155 l.emit(TokenTypePercent) 156 157 case r == '<': 158 if l.acceptString("=") { 159 l.emit(TokenTypeLessThanOrEqual) 160 } else { 161 l.emit(TokenTypeLessThan) 162 } 163 164 case r == '>': 165 if l.acceptString("=") { 166 l.emit(TokenTypeGreaterThanOrEqual) 167 } else { 168 l.emit(TokenTypeGreaterThan) 169 } 170 171 case r == ',': 172 l.emit(TokenTypeComma) 173 174 case r == '=': 175 if l.acceptString("=") { 176 l.emit(TokenTypeEqualEqual) 177 } else { 178 l.emit(TokenTypeEquals) 179 } 180 181 case r == ':': 182 l.emit(TokenTypeColon) 183 184 case r == ';': 185 l.emit(TokenTypeSemicolon) 186 187 case r == '#': 188 l.emit(TokenTypeHash) 189 190 case r == '*': 191 l.emit(TokenTypeStar) 192 193 case r == '.': 194 if l.acceptString("..") { 195 l.emit(TokenTypeEllipsis) 196 } else { 197 l.emit(TokenTypePeriod) 198 } 199 200 case r == '-': 201 if l.accept(">") { 202 l.emit(TokenTypeRightArrow) 203 } else { 204 l.emit(TokenTypeMinus) 205 } 206 207 case isSpace(r): 208 l.emit(TokenTypeWhitespace) 209 210 case isNewline(r): 211 // If the previous token matches the synthetic semicolon list, 212 // we emit a synthetic semicolon instead of a simple newline. 213 if _, ok := syntheticPredecessors[l.lastNonIgnoredToken.Kind]; ok { 214 l.emit(TokenTypeSyntheticSemicolon) 215 } else { 216 l.emit(TokenTypeNewline) 217 } 218 219 case isAlphaNumeric(r): 220 l.backup() 221 return lexIdentifierOrKeyword 222 223 case r == '\'' || r == '"': 224 l.backup() 225 return lexStringLiteral 226 227 case r == '/': 228 // Check for comments. 229 if l.peekValue("/") { 230 l.backup() 231 return lexSinglelineComment 232 } 233 234 if l.peekValue("*") { 235 l.backup() 236 return lexMultilineComment 237 } 238 239 l.emit(TokenTypeDiv) 240 default: 241 return l.errorf(r, "unrecognized character at this location: %#U", r) 242 } 243 } 244 245 l.emit(TokenTypeEOF) 246 return nil 247 } 248 249 // lexStringLiteral scan until the close of the string literal or EOFRUNE 250 func lexStringLiteral(l *Lexer) stateFn { 251 allowNewlines := false 252 terminator := "" 253 254 if l.acceptString(`"""`) { 255 terminator = `"""` 256 allowNewlines = true 257 } else if l.acceptString(`'''`) { 258 terminator = `"""` 259 allowNewlines = true 260 } else if l.acceptString(`"`) { 261 terminator = `"` 262 } else if l.acceptString(`'`) { 263 terminator = `'` 264 } 265 266 for { 267 if l.peekValue(terminator) { 268 l.acceptString(terminator) 269 l.emit(TokenTypeString) 270 return lexSource 271 } 272 273 // Otherwise, consume until we hit EOFRUNE. 274 r := l.next() 275 if !allowNewlines && isNewline(r) { 276 return l.errorf(r, "Unterminated string") 277 } 278 279 if r == EOFRUNE { 280 return l.errorf(r, "Unterminated string") 281 } 282 } 283 } 284 285 // lexSinglelineComment scans until newline or EOFRUNE 286 func lexSinglelineComment(l *Lexer) stateFn { 287 checker := func(r rune) (bool, error) { 288 result := r == EOFRUNE || isNewline(r) 289 return !result, nil 290 } 291 292 l.acceptString("//") 293 return buildLexUntil(TokenTypeSinglelineComment, checker) 294 } 295 296 // lexMultilineComment scans until the close of the multiline comment or EOFRUNE 297 func lexMultilineComment(l *Lexer) stateFn { 298 l.acceptString("/*") 299 for { 300 // Check for the end of the multiline comment. 301 if l.peekValue("*/") { 302 l.acceptString("*/") 303 l.emit(TokenTypeMultilineComment) 304 return lexSource 305 } 306 307 // Otherwise, consume until we hit EOFRUNE. 308 r := l.next() 309 if r == EOFRUNE { 310 return l.errorf(r, "Unterminated multiline comment") 311 } 312 } 313 } 314 315 // lexIdentifierOrKeyword searches for a keyword or literal identifier. 316 func lexIdentifierOrKeyword(l *Lexer) stateFn { 317 for { 318 if !isAlphaNumeric(l.peek()) { 319 break 320 } 321 322 l.next() 323 } 324 325 _, isKeyword := keywords[l.value()] 326 327 switch { 328 case isKeyword: 329 l.emit(TokenTypeKeyword) 330 331 default: 332 l.emit(TokenTypeIdentifier) 333 } 334 335 return lexSource 336 } 337 338 // isSpace reports whether r is a space character. 339 func isSpace(r rune) bool { 340 return r == ' ' || r == '\t' 341 } 342 343 // isNewline reports whether r is a newline character. 344 func isNewline(r rune) bool { 345 return r == '\r' || r == '\n' 346 } 347 348 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 349 func isAlphaNumeric(r rune) bool { 350 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 351 }