github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/pkg/schemadsl/lexer/lex.go (about) 1 // Based on design first introduced in: http://blog.golang.org/two-go-talks-lexical-scanning-in-go-and 2 // Portions copied and modified from: https://github.com/golang/go/blob/master/src/text/template/parse/lex.go 3 4 package lexer 5 6 import ( 7 "fmt" 8 "strings" 9 "sync" 10 "unicode/utf8" 11 12 "github.com/authzed/spicedb/pkg/schemadsl/input" 13 ) 14 15 const EOFRUNE = -1 16 17 // createLexer creates a new scanner for the input string. 18 func createLexer(source input.Source, input string) *Lexer { 19 l := &Lexer{ 20 source: source, 21 input: input, 22 tokens: make(chan Lexeme), 23 closed: make(chan struct{}), 24 } 25 go l.run() 26 return l 27 } 28 29 // run runs the state machine for the lexer. 30 func (l *Lexer) run() { 31 defer func() { 32 close(l.tokens) 33 }() 34 l.withLock(func() { 35 l.state = lexSource 36 }) 37 var state stateFn 38 for { 39 l.withRLock(func() { 40 state = l.state 41 }) 42 if state == nil { 43 break 44 } 45 next := state(l) 46 l.withLock(func() { 47 l.state = next 48 }) 49 } 50 } 51 52 // Close stops the lexer from running. 53 func (l *Lexer) Close() { 54 close(l.closed) 55 l.withLock(func() { 56 l.state = nil 57 }) 58 } 59 60 // withLock runs f protected by l's lock 61 func (l *Lexer) withLock(f func()) { 62 l.Lock() 63 defer l.Unlock() 64 f() 65 } 66 67 // withRLock runs f protected by l's read lock 68 func (l *Lexer) withRLock(f func()) { 69 l.RLock() 70 defer l.RUnlock() 71 f() 72 } 73 74 // Lexeme represents a token returned from scanning the contents of a file. 75 type Lexeme struct { 76 Kind TokenType // The type of this lexeme. 77 Position input.BytePosition // The starting position of this token in the input string. 78 Value string // The textual value of this token. 79 Error string // The error associated with the lexeme, if any. 80 } 81 82 // stateFn represents the state of the scanner as a function that returns the next state. 83 type stateFn func(*Lexer) stateFn 84 85 // Lexer holds the state of the scanner. 86 type Lexer struct { 87 sync.RWMutex 88 source input.Source // the name of the input; used only for error reports 89 input string // the string being scanned 90 state stateFn // the next lexing function to enter 91 pos input.BytePosition // current position in the input 92 start input.BytePosition // start position of this token 93 width input.BytePosition // width of last rune read from input 94 lastPos input.BytePosition // position of most recent token returned by nextToken 95 tokens chan Lexeme // channel of scanned lexemes 96 currentToken Lexeme // The current token if any 97 lastNonIgnoredToken Lexeme // The last token returned that is non-whitespace and non-comment 98 closed chan struct{} // Holds the closed channel 99 } 100 101 // nextToken returns the next token from the input. 102 func (l *Lexer) nextToken() Lexeme { 103 token := <-l.tokens 104 l.lastPos = token.Position 105 return token 106 } 107 108 // next returns the next rune in the input. 109 func (l *Lexer) next() rune { 110 if int(l.pos) >= len(l.input) { 111 l.width = 0 112 return EOFRUNE 113 } 114 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 115 l.width = input.BytePosition(w) 116 l.pos += l.width 117 return r 118 } 119 120 // peek returns but does not consume the next rune in the input. 121 func (l *Lexer) peek() rune { 122 r := l.next() 123 l.backup() 124 return r 125 } 126 127 // backup steps back one rune. Can only be called once per call of next. 128 func (l *Lexer) backup() { 129 l.pos -= l.width 130 } 131 132 // value returns the current value of the token in the lexer. 133 func (l *Lexer) value() string { 134 return l.input[l.start:l.pos] 135 } 136 137 // emit passes an token back to the client. 138 func (l *Lexer) emit(t TokenType) { 139 currentToken := Lexeme{t, l.start, l.value(), ""} 140 141 if t != TokenTypeWhitespace && t != TokenTypeMultilineComment && t != TokenTypeSinglelineComment { 142 l.lastNonIgnoredToken = currentToken 143 } 144 145 select { 146 case l.tokens <- currentToken: 147 l.currentToken = currentToken 148 l.start = l.pos 149 150 case <-l.closed: 151 return 152 } 153 } 154 155 // errorf returns an error token and terminates the scan by passing 156 // back a nil pointer that will be the next state, terminating l.nexttoken. 157 func (l *Lexer) errorf(currentRune rune, format string, args ...interface{}) stateFn { 158 l.tokens <- Lexeme{TokenTypeError, l.start, string(currentRune), fmt.Sprintf(format, args...)} 159 return nil 160 } 161 162 // peekValue looks forward for the given value string. If found, returns true. 163 func (l *Lexer) peekValue(value string) bool { 164 for index, runeValue := range value { 165 r := l.next() 166 if r != runeValue { 167 for j := 0; j <= index; j++ { 168 l.backup() 169 } 170 return false 171 } 172 } 173 174 for i := 0; i < len(value); i++ { 175 l.backup() 176 } 177 178 return true 179 } 180 181 // accept consumes the next rune if it's from the valid set. 182 func (l *Lexer) accept(valid string) bool { 183 if nextRune := l.next(); strings.ContainsRune(valid, nextRune) { 184 return true 185 } 186 l.backup() 187 return false 188 } 189 190 // acceptString consumes the full given string, if the next tokens in the stream. 191 func (l *Lexer) acceptString(value string) bool { 192 for index, runeValue := range value { 193 if l.next() != runeValue { 194 for i := 0; i <= index; i++ { 195 l.backup() 196 } 197 198 return false 199 } 200 } 201 202 return true 203 } 204 205 // lexSource scans until EOFRUNE 206 func lexSource(l *Lexer) stateFn { 207 return lexerEntrypoint(l) 208 } 209 210 // checkFn returns whether a rune matches for continue looping. 211 type checkFn func(r rune) (bool, error) 212 213 func buildLexUntil(findType TokenType, checker checkFn) stateFn { 214 return func(l *Lexer) stateFn { 215 for { 216 r := l.next() 217 isValid, err := checker(r) 218 if err != nil { 219 return l.errorf(r, "%v", err) 220 } 221 if !isValid { 222 l.backup() 223 break 224 } 225 } 226 227 l.emit(findType) 228 return lexSource 229 } 230 }