github.com/jlmucb/cloudproxy@v0.0.0-20170830161738-b5aa0b619bc4/go/tao/auth/lexer.go (about)

     1  // Copyright (c) 2014, Kevin Walsh.  All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // This code borrows from the lexer design and implementation described
    16  // by Rob Pike, "Lexical Scanning in Go", GTUG Sydney, Aug 30, 2011.
    17  // See: http://cuddle.googlecode.com/hg/talk/lex.html#slide-40
    18  //
    19  // It also borrows from the lexer in package
    20  // github.com/kevinawalsh/datalog/dlengine.
    21  
    22  package auth
    23  
    24  import (
    25  	"bytes"
    26  	"encoding/base64"
    27  	"encoding/hex"
    28  	"fmt"
    29  	"io"
    30  	"unicode"
    31  	"unicode/utf8"
    32  )
    33  
    34  // token is a value returned from the lexer.
    35  type token struct {
    36  	typ itemType
    37  	val interface{} // string, int64, error, or nil
    38  }
    39  
    40  // itemType identifies the type of lex items.
    41  type itemType int
    42  
    43  const (
    44  	itemError          itemType = iota // value contains error
    45  	itemUnexpectedRune                 // value contains the rune
    46  	itemEOF                            // value is nil
    47  	itemKeyword                        // value contains the keyword
    48  	itemIdentifier                     // value contains the identifer
    49  	itemStr                            // value contains the string
    50  	itemBytes                          // value contains the []byte slice
    51  	itemInt                            // value contains the int64
    52  	itemLP                             // value contains '('
    53  	itemRP                             // value contains ')'
    54  	itemComma                          // value contains ','
    55  	itemDot                            // value contains '.'
    56  	itemColon                          // value contains ':'
    57  	itemWhitespace                     // value contains ' ', '\t', '\n', etc.
    58  )
    59  
    60  var (
    61  	tokenFrom      = token{itemKeyword, "from"}
    62  	tokenUntil     = token{itemKeyword, "until"}
    63  	tokenSays      = token{itemKeyword, "says"}
    64  	tokenSpeaksfor = token{itemKeyword, "speaksfor"}
    65  	tokenForall    = token{itemKeyword, "forall"}
    66  	tokenExists    = token{itemKeyword, "exists"}
    67  	tokenImplies   = token{itemKeyword, "implies"}
    68  	tokenOr        = token{itemKeyword, "or"}
    69  	tokenAnd       = token{itemKeyword, "and"}
    70  	tokenNot       = token{itemKeyword, "not"}
    71  	tokenFalse     = token{itemKeyword, "false"}
    72  	tokenTrue      = token{itemKeyword, "true"}
    73  	tokenExt       = token{itemKeyword, "ext"}
    74  	tokenLP        = token{itemLP, '('}
    75  	tokenRP        = token{itemRP, ')'}
    76  	tokenComma     = token{itemComma, ','}
    77  	tokenDot       = token{itemDot, '.'}
    78  	tokenColon     = token{itemColon, ':'}
    79  	tokenEOF       = token{itemEOF, nil}
    80  )
    81  
    82  var reservedKeywordTokens = map[token]bool{
    83  	tokenFrom:      true,
    84  	tokenUntil:     true,
    85  	tokenSays:      true,
    86  	tokenSpeaksfor: true,
    87  	tokenForall:    true,
    88  	tokenExists:    true,
    89  	tokenImplies:   true,
    90  	tokenOr:        true,
    91  	tokenAnd:       true,
    92  	tokenNot:       true,
    93  	tokenFalse:     true,
    94  	tokenTrue:      true,
    95  	tokenExt:       true,
    96  }
    97  
    98  // isPrinToken checks if the input is a principal token. A principal tokens
    99  // is a keyword not in the set of reserved keywords.
   100  func isPrinToken(i token) bool {
   101  	_, ok := reservedKeywordTokens[i]
   102  	if !ok && i.typ == itemKeyword && lower(rune(i.val.(string)[0])) {
   103  		return true
   104  	}
   105  	return false
   106  }
   107  
   108  // String returns pretty-printed token, e.g. for debugging.
   109  func (i token) String() string {
   110  	switch i.typ {
   111  	case itemError:
   112  		return fmt.Sprintf("Error{%v}", i.val)
   113  	case itemUnexpectedRune:
   114  		return fmt.Sprintf("UnexpectedRune{%v}", i.val)
   115  	case itemEOF:
   116  		return "EOF{}"
   117  	case itemKeyword:
   118  		return fmt.Sprintf("Keyword{%q}", i.val)
   119  	case itemIdentifier:
   120  		return fmt.Sprintf("Identifier{%q}", i.val)
   121  	case itemStr:
   122  		return fmt.Sprintf("Str{%q}", i.val)
   123  	case itemBytes:
   124  		return fmt.Sprintf("Bytes{%02x}", i.val)
   125  	case itemInt:
   126  		return fmt.Sprintf("Int{%v}", i.val)
   127  	case itemLP, itemRP, itemComma, itemDot, itemColon:
   128  		return fmt.Sprintf("Punct{%q}", i.val)
   129  	default:
   130  		panic("not reached")
   131  	}
   132  }
   133  
   134  // reader provides input to the scanner.
   135  type reader interface {
   136  	io.RuneScanner // for ReadRune, UnreadRune
   137  	io.Reader      // for Fscanf
   138  }
   139  
   140  // lexer holds the state of the scanner.
   141  type lexer struct {
   142  	input reader       // the input being scanned.
   143  	val   bytes.Buffer // accumulated runes returned from next().
   144  	width int          // width of last rune returned from next().
   145  	done  *token       // token found at end of input.
   146  }
   147  
   148  const eof rune = 0
   149  
   150  func (l *lexer) lexMain() token {
   151  	for {
   152  		switch r := l.next(); {
   153  		case r == eof:
   154  			return tokenEOF
   155  		case unicode.IsSpace(r):
   156  			l.reset()
   157  		case r == '(':
   158  			return tokenLP
   159  		case r == ')':
   160  			return tokenRP
   161  		case r == ',':
   162  			return tokenComma
   163  		case r == '.':
   164  			return tokenDot
   165  		case r == ':':
   166  			return tokenColon
   167  		case r == '"':
   168  			l.backup()
   169  			return l.lexStr()
   170  		case r == '[' || r == '{':
   171  			l.backup()
   172  			return l.lexBytes()
   173  		case r == '-' || digit(r):
   174  			l.backup()
   175  			return l.lexInt()
   176  		case lower(r):
   177  			l.backup()
   178  			return l.lexKeyword()
   179  		case upper(r):
   180  			l.backup()
   181  			return l.lexIdentifier()
   182  		default:
   183  			l.backup()
   184  			return token{itemUnexpectedRune, r}
   185  		}
   186  	}
   187  }
   188  
   189  func (l *lexer) lexStr() token {
   190  	var s string
   191  	if _, err := fmt.Fscanf(l.input, "%q", &s); err != nil {
   192  		return token{itemError, err}
   193  	}
   194  	return token{itemStr, s}
   195  }
   196  
   197  func (l *lexer) lexBytes() token {
   198  	r := l.next()
   199  	if r == '[' {
   200  		var b []byte
   201  		s := ""
   202  		for {
   203  			r = l.next()
   204  			switch {
   205  			case hexChar(r):
   206  				s += string(r)
   207  			case unicode.IsSpace(r) || r == ']':
   208  				x, err := hex.DecodeString(s)
   209  				if err != nil {
   210  					return token{itemError, err}
   211  				}
   212  				b = append(b, x...)
   213  				if r == ']' {
   214  					return token{itemBytes, b}
   215  				}
   216  			default:
   217  				return token{itemError, fmt.Errorf("expected bytes, found %q", s)}
   218  			}
   219  		}
   220  	} else if r == '{' {
   221  		s := ""
   222  		for {
   223  			r = l.next()
   224  			switch {
   225  			case lower(r) || upper(r) || digit(r) || r == '_' || r == '-' || r == '=' || r == '\r' || r == '\n':
   226  				s += string(r)
   227  			case r == '}':
   228  				b, err := base64.URLEncoding.DecodeString(s)
   229  				if err != nil {
   230  					return token{itemError, err}
   231  				}
   232  				return token{itemBytes, b}
   233  			default:
   234  				return token{itemError, fmt.Errorf("expected base64w, found %q", s)}
   235  			}
   236  		}
   237  	} else {
   238  		return token{itemError, fmt.Errorf("expected '[' or '{', found %q", r)}
   239  	}
   240  }
   241  
   242  func (l *lexer) lexInt() token {
   243  	var i int64
   244  	if _, err := fmt.Fscanf(l.input, "%d", &i); err != nil {
   245  		return token{itemError, err}
   246  	}
   247  	return token{itemInt, i}
   248  }
   249  
   250  func (l *lexer) lexKeyword() token {
   251  	for {
   252  		r := l.next()
   253  		if !lower(r) {
   254  			l.backup()
   255  			t := token{itemKeyword, l.reset()}
   256  			return t
   257  		}
   258  	}
   259  }
   260  
   261  func (l *lexer) lexIdentifier() token {
   262  	// precondition: l.next() is [A-Z]
   263  	for {
   264  		r := l.next()
   265  		if !(lower(r) || upper(r) || digit(r) || r == '_') {
   266  			l.backup()
   267  			return token{itemIdentifier, l.reset()}
   268  		}
   269  	}
   270  }
   271  
   272  func digit(r rune) bool {
   273  	return '0' <= r && r <= '9'
   274  }
   275  
   276  func lower(r rune) bool {
   277  	return 'a' <= r && r <= 'z'
   278  }
   279  
   280  func upper(r rune) bool {
   281  	return 'A' <= r && r <= 'Z'
   282  }
   283  
   284  func hexChar(r rune) bool {
   285  	return ('0' <= r && r <= '9') || ('a' <= r && r <= 'f') || ('A' <= r && r <= 'F')
   286  }
   287  
   288  // next returns the next rune in the input.
   289  func (l *lexer) next() (r rune) {
   290  	r, n, err := l.input.ReadRune()
   291  	if err == io.EOF {
   292  		l.width = 0
   293  		return eof
   294  	}
   295  	l.val.WriteRune(r)
   296  	// BUG(kwalsh) fmt.ScanState.ReadRune() returns incorrect length. See issue
   297  	// 8512 here: https://code.google.com/p/go/issues/detail?id=8512
   298  	n = utf8.RuneLen(r)
   299  	l.width = n
   300  	return r
   301  }
   302  
   303  // backup steps back one rune. Can be called only once per call of next.
   304  func (l *lexer) backup() {
   305  	if l.width > 0 {
   306  		l.input.UnreadRune()
   307  		l.val.Truncate(l.val.Len() - l.width)
   308  		l.width = 0
   309  	}
   310  }
   311  
   312  // reset consumes accumulated input and resets val and width.
   313  func (l *lexer) reset() string {
   314  	s := l.val.String()
   315  	l.val.Reset()
   316  	l.width = 0
   317  	return s
   318  }
   319  
   320  // lex creates a new scanner for the input string.
   321  func lex(input reader) *lexer {
   322  	return &lexer{input: input}
   323  }
   324  
   325  // nextToken returns the next token from the input.
   326  func (l *lexer) nextToken() token {
   327  	if l.done != nil {
   328  		// only happens after itemEOF, itemError, or itemUnexpectedRune
   329  		return *l.done
   330  	}
   331  	token := l.lexMain()
   332  	l.reset()
   333  	if token == tokenEOF || token.typ == itemError || token.typ == itemUnexpectedRune {
   334  		l.done = &token
   335  	}
   336  	return token
   337  }
   338  
   339  // peek gets the next rune in the input without advancing the input.
   340  func (l *lexer) peek() rune {
   341  	r := l.next()
   342  	l.backup()
   343  	return r
   344  }