github.com/jlmucb/cloudproxy@v0.0.0-20170830161738-b5aa0b619bc4/go/tao/auth/lexer.go (about) 1 // Copyright (c) 2014, Kevin Walsh. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // This code borrows from the lexer design and implementation described 16 // by Rob Pike, "Lexical Scanning in Go", GTUG Sydney, Aug 30, 2011. 17 // See: http://cuddle.googlecode.com/hg/talk/lex.html#slide-40 18 // 19 // It also borrows from the lexer in package 20 // github.com/kevinawalsh/datalog/dlengine. 21 22 package auth 23 24 import ( 25 "bytes" 26 "encoding/base64" 27 "encoding/hex" 28 "fmt" 29 "io" 30 "unicode" 31 "unicode/utf8" 32 ) 33 34 // token is a value returned from the lexer. 35 type token struct { 36 typ itemType 37 val interface{} // string, int64, error, or nil 38 } 39 40 // itemType identifies the type of lex items. 41 type itemType int 42 43 const ( 44 itemError itemType = iota // value contains error 45 itemUnexpectedRune // value contains the rune 46 itemEOF // value is nil 47 itemKeyword // value contains the keyword 48 itemIdentifier // value contains the identifer 49 itemStr // value contains the string 50 itemBytes // value contains the []byte slice 51 itemInt // value contains the int64 52 itemLP // value contains '(' 53 itemRP // value contains ')' 54 itemComma // value contains ',' 55 itemDot // value contains '.' 56 itemColon // value contains ':' 57 itemWhitespace // value contains ' ', '\t', '\n', etc. 58 ) 59 60 var ( 61 tokenFrom = token{itemKeyword, "from"} 62 tokenUntil = token{itemKeyword, "until"} 63 tokenSays = token{itemKeyword, "says"} 64 tokenSpeaksfor = token{itemKeyword, "speaksfor"} 65 tokenForall = token{itemKeyword, "forall"} 66 tokenExists = token{itemKeyword, "exists"} 67 tokenImplies = token{itemKeyword, "implies"} 68 tokenOr = token{itemKeyword, "or"} 69 tokenAnd = token{itemKeyword, "and"} 70 tokenNot = token{itemKeyword, "not"} 71 tokenFalse = token{itemKeyword, "false"} 72 tokenTrue = token{itemKeyword, "true"} 73 tokenExt = token{itemKeyword, "ext"} 74 tokenLP = token{itemLP, '('} 75 tokenRP = token{itemRP, ')'} 76 tokenComma = token{itemComma, ','} 77 tokenDot = token{itemDot, '.'} 78 tokenColon = token{itemColon, ':'} 79 tokenEOF = token{itemEOF, nil} 80 ) 81 82 var reservedKeywordTokens = map[token]bool{ 83 tokenFrom: true, 84 tokenUntil: true, 85 tokenSays: true, 86 tokenSpeaksfor: true, 87 tokenForall: true, 88 tokenExists: true, 89 tokenImplies: true, 90 tokenOr: true, 91 tokenAnd: true, 92 tokenNot: true, 93 tokenFalse: true, 94 tokenTrue: true, 95 tokenExt: true, 96 } 97 98 // isPrinToken checks if the input is a principal token. A principal tokens 99 // is a keyword not in the set of reserved keywords. 100 func isPrinToken(i token) bool { 101 _, ok := reservedKeywordTokens[i] 102 if !ok && i.typ == itemKeyword && lower(rune(i.val.(string)[0])) { 103 return true 104 } 105 return false 106 } 107 108 // String returns pretty-printed token, e.g. for debugging. 109 func (i token) String() string { 110 switch i.typ { 111 case itemError: 112 return fmt.Sprintf("Error{%v}", i.val) 113 case itemUnexpectedRune: 114 return fmt.Sprintf("UnexpectedRune{%v}", i.val) 115 case itemEOF: 116 return "EOF{}" 117 case itemKeyword: 118 return fmt.Sprintf("Keyword{%q}", i.val) 119 case itemIdentifier: 120 return fmt.Sprintf("Identifier{%q}", i.val) 121 case itemStr: 122 return fmt.Sprintf("Str{%q}", i.val) 123 case itemBytes: 124 return fmt.Sprintf("Bytes{%02x}", i.val) 125 case itemInt: 126 return fmt.Sprintf("Int{%v}", i.val) 127 case itemLP, itemRP, itemComma, itemDot, itemColon: 128 return fmt.Sprintf("Punct{%q}", i.val) 129 default: 130 panic("not reached") 131 } 132 } 133 134 // reader provides input to the scanner. 135 type reader interface { 136 io.RuneScanner // for ReadRune, UnreadRune 137 io.Reader // for Fscanf 138 } 139 140 // lexer holds the state of the scanner. 141 type lexer struct { 142 input reader // the input being scanned. 143 val bytes.Buffer // accumulated runes returned from next(). 144 width int // width of last rune returned from next(). 145 done *token // token found at end of input. 146 } 147 148 const eof rune = 0 149 150 func (l *lexer) lexMain() token { 151 for { 152 switch r := l.next(); { 153 case r == eof: 154 return tokenEOF 155 case unicode.IsSpace(r): 156 l.reset() 157 case r == '(': 158 return tokenLP 159 case r == ')': 160 return tokenRP 161 case r == ',': 162 return tokenComma 163 case r == '.': 164 return tokenDot 165 case r == ':': 166 return tokenColon 167 case r == '"': 168 l.backup() 169 return l.lexStr() 170 case r == '[' || r == '{': 171 l.backup() 172 return l.lexBytes() 173 case r == '-' || digit(r): 174 l.backup() 175 return l.lexInt() 176 case lower(r): 177 l.backup() 178 return l.lexKeyword() 179 case upper(r): 180 l.backup() 181 return l.lexIdentifier() 182 default: 183 l.backup() 184 return token{itemUnexpectedRune, r} 185 } 186 } 187 } 188 189 func (l *lexer) lexStr() token { 190 var s string 191 if _, err := fmt.Fscanf(l.input, "%q", &s); err != nil { 192 return token{itemError, err} 193 } 194 return token{itemStr, s} 195 } 196 197 func (l *lexer) lexBytes() token { 198 r := l.next() 199 if r == '[' { 200 var b []byte 201 s := "" 202 for { 203 r = l.next() 204 switch { 205 case hexChar(r): 206 s += string(r) 207 case unicode.IsSpace(r) || r == ']': 208 x, err := hex.DecodeString(s) 209 if err != nil { 210 return token{itemError, err} 211 } 212 b = append(b, x...) 213 if r == ']' { 214 return token{itemBytes, b} 215 } 216 default: 217 return token{itemError, fmt.Errorf("expected bytes, found %q", s)} 218 } 219 } 220 } else if r == '{' { 221 s := "" 222 for { 223 r = l.next() 224 switch { 225 case lower(r) || upper(r) || digit(r) || r == '_' || r == '-' || r == '=' || r == '\r' || r == '\n': 226 s += string(r) 227 case r == '}': 228 b, err := base64.URLEncoding.DecodeString(s) 229 if err != nil { 230 return token{itemError, err} 231 } 232 return token{itemBytes, b} 233 default: 234 return token{itemError, fmt.Errorf("expected base64w, found %q", s)} 235 } 236 } 237 } else { 238 return token{itemError, fmt.Errorf("expected '[' or '{', found %q", r)} 239 } 240 } 241 242 func (l *lexer) lexInt() token { 243 var i int64 244 if _, err := fmt.Fscanf(l.input, "%d", &i); err != nil { 245 return token{itemError, err} 246 } 247 return token{itemInt, i} 248 } 249 250 func (l *lexer) lexKeyword() token { 251 for { 252 r := l.next() 253 if !lower(r) { 254 l.backup() 255 t := token{itemKeyword, l.reset()} 256 return t 257 } 258 } 259 } 260 261 func (l *lexer) lexIdentifier() token { 262 // precondition: l.next() is [A-Z] 263 for { 264 r := l.next() 265 if !(lower(r) || upper(r) || digit(r) || r == '_') { 266 l.backup() 267 return token{itemIdentifier, l.reset()} 268 } 269 } 270 } 271 272 func digit(r rune) bool { 273 return '0' <= r && r <= '9' 274 } 275 276 func lower(r rune) bool { 277 return 'a' <= r && r <= 'z' 278 } 279 280 func upper(r rune) bool { 281 return 'A' <= r && r <= 'Z' 282 } 283 284 func hexChar(r rune) bool { 285 return ('0' <= r && r <= '9') || ('a' <= r && r <= 'f') || ('A' <= r && r <= 'F') 286 } 287 288 // next returns the next rune in the input. 289 func (l *lexer) next() (r rune) { 290 r, n, err := l.input.ReadRune() 291 if err == io.EOF { 292 l.width = 0 293 return eof 294 } 295 l.val.WriteRune(r) 296 // BUG(kwalsh) fmt.ScanState.ReadRune() returns incorrect length. See issue 297 // 8512 here: https://code.google.com/p/go/issues/detail?id=8512 298 n = utf8.RuneLen(r) 299 l.width = n 300 return r 301 } 302 303 // backup steps back one rune. Can be called only once per call of next. 304 func (l *lexer) backup() { 305 if l.width > 0 { 306 l.input.UnreadRune() 307 l.val.Truncate(l.val.Len() - l.width) 308 l.width = 0 309 } 310 } 311 312 // reset consumes accumulated input and resets val and width. 313 func (l *lexer) reset() string { 314 s := l.val.String() 315 l.val.Reset() 316 l.width = 0 317 return s 318 } 319 320 // lex creates a new scanner for the input string. 321 func lex(input reader) *lexer { 322 return &lexer{input: input} 323 } 324 325 // nextToken returns the next token from the input. 326 func (l *lexer) nextToken() token { 327 if l.done != nil { 328 // only happens after itemEOF, itemError, or itemUnexpectedRune 329 return *l.done 330 } 331 token := l.lexMain() 332 l.reset() 333 if token == tokenEOF || token.typ == itemError || token.typ == itemUnexpectedRune { 334 l.done = &token 335 } 336 return token 337 } 338 339 // peek gets the next rune in the input without advancing the input. 340 func (l *lexer) peek() rune { 341 r := l.next() 342 l.backup() 343 return r 344 }