github.com/emcfarlane/larking@v0.0.0-20220605172417-1704b45ee6c3/lexer.go (about) 1 // Copyright 2021 Edward McFarlane. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package larking 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // ### Path template syntax 15 // 16 // Template = "/" Segments [ Verb ] ; 17 // Segments = Segment { "/" Segment } ; 18 // Segment = "*" | "**" | LITERAL | Variable ; 19 // Variable = "{" FieldPath [ "=" Segments ] "}" ; 20 // FieldPath = IDENT { "." IDENT } ; 21 // Verb = ":" LITERAL ; 22 23 type tokenType int 24 25 const ( 26 tokenError = iota 27 tokenSlash // / 28 tokenStar // * 29 tokenStarStar // ** 30 tokenVariableStart // { 31 tokenVariableEnd // } 32 tokenEqual // = 33 tokenValue // a-z A-Z 0-9 - _ 34 tokenDot // . 35 tokenVerb // : 36 tokenPath // a-z A-Z 0-9 . - _ ~ ! $ & ' ( ) * + , ; = @ 37 tokenEOF 38 ) 39 40 type token struct { 41 typ tokenType 42 val string 43 } 44 45 func (t token) String() string { 46 return fmt.Sprintf("(%d) %s", t.typ, t.val) 47 } 48 49 type tokens []token 50 51 func (toks tokens) String() string { 52 var b strings.Builder 53 for _, tok := range toks { 54 b.WriteString(tok.val) 55 } 56 return b.String() 57 } 58 59 func (toks tokens) index(typ tokenType) int { 60 for i, tok := range toks { 61 if tok.typ == typ { 62 return i 63 } 64 } 65 return -1 66 } 67 68 func (toks tokens) indexAny(set tokenSet) int { 69 for i, tok := range toks { 70 if set.has(tok.typ) { 71 return i 72 } 73 } 74 return -1 75 } 76 77 type lexer struct { 78 input string 79 start int 80 pos int 81 width int 82 83 toks tokens 84 } 85 86 type tokenSet uint64 87 88 func (s tokenSet) has(typ tokenType) bool { return s&(1<<uint64(typ)) != 0 } 89 90 func newTokenSet(typs ...tokenType) (s tokenSet) { 91 for _, typ := range typs { 92 s |= 1 << uint(typ) 93 } 94 return s 95 } 96 97 const eof = -1 98 99 func (l *lexer) next() (r rune) { 100 if l.pos >= len(l.input) { 101 l.width = 0 102 return eof 103 } 104 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 105 l.pos += l.width 106 return r 107 } 108 109 func (l *lexer) current() (r rune) { 110 if l.width == 0 { 111 return 0 112 } else if l.pos > l.width { 113 r, _ = utf8.DecodeRuneInString(l.input[l.pos-l.width:]) 114 } else { 115 r, _ = utf8.DecodeRuneInString(l.input) 116 } 117 return r 118 } 119 120 func (l *lexer) backup() { 121 l.pos -= l.width 122 } 123 124 func (l *lexer) acceptRun(isValid func(r rune) bool) int { 125 var i int 126 for isValid(l.next()) { 127 i++ 128 } 129 l.backup() 130 return i 131 } 132 133 func (l *lexer) emit(typ tokenType) { 134 tok := token{typ: typ, val: l.input[l.start:l.pos]} 135 l.toks = append(l.toks, tok) 136 l.start = l.pos 137 } 138 139 func (l *lexer) errUnexpected() error { 140 l.emit(tokenError) 141 r := l.current() 142 return fmt.Errorf("%v:%v unexpected rune %q", l.pos-l.width, l.pos, r) 143 } 144 func (l *lexer) errShort() error { 145 l.emit(tokenError) 146 r := l.current() 147 return fmt.Errorf("%v:%v short read %q", l.pos-l.width, l.pos, r) 148 } 149 150 func isValue(r rune) bool { 151 return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' || r == '-' 152 } 153 154 var isPathRune = func() map[rune]bool { 155 m := make(map[rune]bool) 156 for _, r := range ".-_~!$&'()*+,;=@" { 157 m[r] = true 158 } 159 return m 160 }() 161 162 func isPath(r rune) bool { 163 return isValue(r) || isPathRune[r] 164 } 165 166 func lexValue(l *lexer) error { 167 if i := l.acceptRun(isValue); i == 0 { 168 return l.errShort() 169 } 170 l.emit(tokenValue) 171 return nil 172 } 173 174 func lexFieldPath(l *lexer) error { 175 if err := lexValue(l); err != nil { 176 return err 177 } 178 for { 179 if r := l.next(); r != '.' { 180 l.backup() // unknown 181 return nil 182 } 183 l.emit(tokenDot) 184 if err := lexValue(l); err != nil { 185 return err 186 } 187 } 188 } 189 190 func lexVerb(l *lexer) error { 191 if err := lexValue(l); err != nil { 192 return err 193 } 194 if r := l.next(); r == eof { 195 l.emit(tokenEOF) 196 return nil 197 } 198 return l.errUnexpected() 199 } 200 201 func lexVariable(l *lexer) error { 202 r := l.next() 203 if r != '{' { 204 return l.errUnexpected() 205 } 206 l.emit(tokenVariableStart) 207 if err := lexFieldPath(l); err != nil { 208 return err 209 } 210 211 r = l.next() 212 if r == '=' { 213 l.emit(tokenEqual) 214 215 if err := lexSegments(l); err != nil { 216 return err 217 } 218 r = l.next() 219 } 220 221 if r != '}' { 222 return l.errUnexpected() 223 } 224 l.emit(tokenVariableEnd) 225 return nil 226 } 227 228 func lexSegment(l *lexer) error { 229 r := l.next() 230 switch { 231 case unicode.IsLetter(r): 232 if i := l.acceptRun(isValue); i == 0 { 233 return l.errShort() 234 } 235 l.emit(tokenValue) 236 return nil 237 case r == '*': 238 rn := l.next() 239 if rn == '*' { 240 l.emit(tokenStarStar) 241 return nil 242 } 243 l.backup() 244 l.emit(tokenStar) 245 return nil 246 case r == '{': 247 l.backup() 248 return lexVariable(l) 249 default: 250 return l.errUnexpected() 251 } 252 } 253 254 func lexSegments(l *lexer) error { 255 for { 256 if err := lexSegment(l); err != nil { 257 return err 258 } 259 if r := l.next(); r != '/' { 260 l.backup() // unknown 261 return nil 262 } 263 l.emit(tokenSlash) 264 } 265 } 266 267 func lexTemplate(l *lexer) error { 268 if r := l.next(); r != '/' { 269 return l.errUnexpected() 270 } 271 l.emit(tokenSlash) 272 if err := lexSegments(l); err != nil { 273 return err 274 } 275 276 switch r := l.next(); r { 277 case ':': 278 l.emit(tokenVerb) 279 return lexVerb(l) 280 case eof: 281 l.emit(tokenEOF) 282 return nil 283 default: 284 return l.errUnexpected() 285 } 286 } 287 288 func lexPathSegment(l *lexer) error { 289 if i := l.acceptRun(isPath); i == 0 { 290 return l.errShort() 291 } 292 l.emit(tokenPath) 293 return nil 294 } 295 296 // lexPath emits all tokenSlash, tokenVerb and the rest as tokenPath 297 func lexPath(l *lexer) error { 298 for { 299 switch r := l.next(); r { 300 case '/': 301 l.emit(tokenSlash) 302 if err := lexPathSegment(l); err != nil { 303 return err 304 } 305 case ':': 306 l.emit(tokenVerb) 307 if err := lexPathSegment(l); err != nil { 308 return err 309 } 310 case eof: 311 l.emit(tokenEOF) 312 return nil 313 default: 314 panic(":(") 315 } 316 } 317 }