github.com/emcfarlane/larking@v0.0.0-20220605172417-1704b45ee6c3/lexer.go (about)

     1  // Copyright 2021 Edward McFarlane. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package larking
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  )
    13  
    14  // ### Path template syntax
    15  //
    16  //     Template = "/" Segments [ Verb ] ;
    17  //     Segments = Segment { "/" Segment } ;
    18  //     Segment  = "*" | "**" | LITERAL | Variable ;
    19  //     Variable = "{" FieldPath [ "=" Segments ] "}" ;
    20  //     FieldPath = IDENT { "." IDENT } ;
    21  //     Verb     = ":" LITERAL ;
    22  
    23  type tokenType int
    24  
    25  const (
    26  	tokenError         = iota
    27  	tokenSlash         // /
    28  	tokenStar          // *
    29  	tokenStarStar      // **
    30  	tokenVariableStart // {
    31  	tokenVariableEnd   // }
    32  	tokenEqual         // =
    33  	tokenValue         // a-z A-Z 0-9 - _
    34  	tokenDot           // .
    35  	tokenVerb          // :
    36  	tokenPath          // a-z A-Z 0-9 . - _ ~ ! $ & ' ( ) * + , ; = @
    37  	tokenEOF
    38  )
    39  
    40  type token struct {
    41  	typ tokenType
    42  	val string
    43  }
    44  
    45  func (t token) String() string {
    46  	return fmt.Sprintf("(%d) %s", t.typ, t.val)
    47  }
    48  
    49  type tokens []token
    50  
    51  func (toks tokens) String() string {
    52  	var b strings.Builder
    53  	for _, tok := range toks {
    54  		b.WriteString(tok.val)
    55  	}
    56  	return b.String()
    57  }
    58  
    59  func (toks tokens) index(typ tokenType) int {
    60  	for i, tok := range toks {
    61  		if tok.typ == typ {
    62  			return i
    63  		}
    64  	}
    65  	return -1
    66  }
    67  
    68  func (toks tokens) indexAny(set tokenSet) int {
    69  	for i, tok := range toks {
    70  		if set.has(tok.typ) {
    71  			return i
    72  		}
    73  	}
    74  	return -1
    75  }
    76  
    77  type lexer struct {
    78  	input string
    79  	start int
    80  	pos   int
    81  	width int
    82  
    83  	toks tokens
    84  }
    85  
    86  type tokenSet uint64
    87  
    88  func (s tokenSet) has(typ tokenType) bool { return s&(1<<uint64(typ)) != 0 }
    89  
    90  func newTokenSet(typs ...tokenType) (s tokenSet) {
    91  	for _, typ := range typs {
    92  		s |= 1 << uint(typ)
    93  	}
    94  	return s
    95  }
    96  
    97  const eof = -1
    98  
    99  func (l *lexer) next() (r rune) {
   100  	if l.pos >= len(l.input) {
   101  		l.width = 0
   102  		return eof
   103  	}
   104  	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   105  	l.pos += l.width
   106  	return r
   107  }
   108  
   109  func (l *lexer) current() (r rune) {
   110  	if l.width == 0 {
   111  		return 0
   112  	} else if l.pos > l.width {
   113  		r, _ = utf8.DecodeRuneInString(l.input[l.pos-l.width:])
   114  	} else {
   115  		r, _ = utf8.DecodeRuneInString(l.input)
   116  	}
   117  	return r
   118  }
   119  
   120  func (l *lexer) backup() {
   121  	l.pos -= l.width
   122  }
   123  
   124  func (l *lexer) acceptRun(isValid func(r rune) bool) int {
   125  	var i int
   126  	for isValid(l.next()) {
   127  		i++
   128  	}
   129  	l.backup()
   130  	return i
   131  }
   132  
   133  func (l *lexer) emit(typ tokenType) {
   134  	tok := token{typ: typ, val: l.input[l.start:l.pos]}
   135  	l.toks = append(l.toks, tok)
   136  	l.start = l.pos
   137  }
   138  
   139  func (l *lexer) errUnexpected() error {
   140  	l.emit(tokenError)
   141  	r := l.current()
   142  	return fmt.Errorf("%v:%v unexpected rune %q", l.pos-l.width, l.pos, r)
   143  }
   144  func (l *lexer) errShort() error {
   145  	l.emit(tokenError)
   146  	r := l.current()
   147  	return fmt.Errorf("%v:%v short read %q", l.pos-l.width, l.pos, r)
   148  }
   149  
   150  func isValue(r rune) bool {
   151  	return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' || r == '-'
   152  }
   153  
   154  var isPathRune = func() map[rune]bool {
   155  	m := make(map[rune]bool)
   156  	for _, r := range ".-_~!$&'()*+,;=@" {
   157  		m[r] = true
   158  	}
   159  	return m
   160  }()
   161  
   162  func isPath(r rune) bool {
   163  	return isValue(r) || isPathRune[r]
   164  }
   165  
   166  func lexValue(l *lexer) error {
   167  	if i := l.acceptRun(isValue); i == 0 {
   168  		return l.errShort()
   169  	}
   170  	l.emit(tokenValue)
   171  	return nil
   172  }
   173  
   174  func lexFieldPath(l *lexer) error {
   175  	if err := lexValue(l); err != nil {
   176  		return err
   177  	}
   178  	for {
   179  		if r := l.next(); r != '.' {
   180  			l.backup() // unknown
   181  			return nil
   182  		}
   183  		l.emit(tokenDot)
   184  		if err := lexValue(l); err != nil {
   185  			return err
   186  		}
   187  	}
   188  }
   189  
   190  func lexVerb(l *lexer) error {
   191  	if err := lexValue(l); err != nil {
   192  		return err
   193  	}
   194  	if r := l.next(); r == eof {
   195  		l.emit(tokenEOF)
   196  		return nil
   197  	}
   198  	return l.errUnexpected()
   199  }
   200  
   201  func lexVariable(l *lexer) error {
   202  	r := l.next()
   203  	if r != '{' {
   204  		return l.errUnexpected()
   205  	}
   206  	l.emit(tokenVariableStart)
   207  	if err := lexFieldPath(l); err != nil {
   208  		return err
   209  	}
   210  
   211  	r = l.next()
   212  	if r == '=' {
   213  		l.emit(tokenEqual)
   214  
   215  		if err := lexSegments(l); err != nil {
   216  			return err
   217  		}
   218  		r = l.next()
   219  	}
   220  
   221  	if r != '}' {
   222  		return l.errUnexpected()
   223  	}
   224  	l.emit(tokenVariableEnd)
   225  	return nil
   226  }
   227  
   228  func lexSegment(l *lexer) error {
   229  	r := l.next()
   230  	switch {
   231  	case unicode.IsLetter(r):
   232  		if i := l.acceptRun(isValue); i == 0 {
   233  			return l.errShort()
   234  		}
   235  		l.emit(tokenValue)
   236  		return nil
   237  	case r == '*':
   238  		rn := l.next()
   239  		if rn == '*' {
   240  			l.emit(tokenStarStar)
   241  			return nil
   242  		}
   243  		l.backup()
   244  		l.emit(tokenStar)
   245  		return nil
   246  	case r == '{':
   247  		l.backup()
   248  		return lexVariable(l)
   249  	default:
   250  		return l.errUnexpected()
   251  	}
   252  }
   253  
   254  func lexSegments(l *lexer) error {
   255  	for {
   256  		if err := lexSegment(l); err != nil {
   257  			return err
   258  		}
   259  		if r := l.next(); r != '/' {
   260  			l.backup() // unknown
   261  			return nil
   262  		}
   263  		l.emit(tokenSlash)
   264  	}
   265  }
   266  
   267  func lexTemplate(l *lexer) error {
   268  	if r := l.next(); r != '/' {
   269  		return l.errUnexpected()
   270  	}
   271  	l.emit(tokenSlash)
   272  	if err := lexSegments(l); err != nil {
   273  		return err
   274  	}
   275  
   276  	switch r := l.next(); r {
   277  	case ':':
   278  		l.emit(tokenVerb)
   279  		return lexVerb(l)
   280  	case eof:
   281  		l.emit(tokenEOF)
   282  		return nil
   283  	default:
   284  		return l.errUnexpected()
   285  	}
   286  }
   287  
   288  func lexPathSegment(l *lexer) error {
   289  	if i := l.acceptRun(isPath); i == 0 {
   290  		return l.errShort()
   291  	}
   292  	l.emit(tokenPath)
   293  	return nil
   294  }
   295  
   296  // lexPath emits all tokenSlash, tokenVerb and the rest as tokenPath
   297  func lexPath(l *lexer) error {
   298  	for {
   299  		switch r := l.next(); r {
   300  		case '/':
   301  			l.emit(tokenSlash)
   302  			if err := lexPathSegment(l); err != nil {
   303  				return err
   304  			}
   305  		case ':':
   306  			l.emit(tokenVerb)
   307  			if err := lexPathSegment(l); err != nil {
   308  				return err
   309  			}
   310  		case eof:
   311  			l.emit(tokenEOF)
   312  			return nil
   313  		default:
   314  			panic(":(")
   315  		}
   316  	}
   317  }