github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/talks/2011/lex/r59-lex.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  package template
     8  
     9  import (
    10  	"fmt"
    11  	"strings"
    12  	"unicode"
    13  	"utf8"
    14  )
    15  
    16  // item represents a token or text string returned from the scanner.
    17  type item struct {
    18  	typ itemType
    19  	val string
    20  }
    21  
    22  func (i item) String() string {
    23  	switch {
    24  	case i.typ == itemEOF:
    25  		return "EOF"
    26  	case i.typ == itemError:
    27  		return i.val
    28  	case i.typ > itemKeyword:
    29  		return fmt.Sprintf("<%s>", i.val)
    30  	case len(i.val) > 10:
    31  		return fmt.Sprintf("%.10q...", i.val)
    32  	}
    33  	return fmt.Sprintf("%q", i.val)
    34  }
    35  
    36  // itemType identifies the type of lex items.
    37  type itemType int
    38  
    39  const (
    40  	itemError   itemType = iota // error occurred; value is text of error
    41  	itemBool                    // boolean constant
    42  	itemComplex                 // complex constant (1+2i); imaginary is just a number
    43  	itemEOF
    44  	itemField      // alphanumeric identifier, starting with '.', possibly chained ('.x.y')
    45  	itemIdentifier // alphanumeric identifier
    46  	itemLeftDelim  // left action delimiter
    47  	itemNumber     // simple number, including imaginary
    48  	itemPipe       // pipe symbol
    49  	itemRawString  // raw quoted string (includes quotes)
    50  	itemRightDelim // right action delimiter
    51  	itemString     // quoted string (includes quotes)
    52  	itemText       // plain text
    53  	// Keywords appear after all the rest.
    54  	itemKeyword  // used only to delimit the keywords
    55  	itemDot      // the cursor, spelled '.'.
    56  	itemDefine   // define keyword
    57  	itemElse     // else keyword
    58  	itemEnd      // end keyword
    59  	itemIf       // if keyword
    60  	itemRange    // range keyword
    61  	itemTemplate // template keyword
    62  	itemWith     // with keyword
    63  )
    64  
    65  // Make the types prettyprint.
    66  var itemName = map[itemType]string{
    67  	itemError:      "error",
    68  	itemBool:       "bool",
    69  	itemComplex:    "complex",
    70  	itemEOF:        "EOF",
    71  	itemField:      "field",
    72  	itemIdentifier: "identifier",
    73  	itemLeftDelim:  "left delim",
    74  	itemNumber:     "number",
    75  	itemPipe:       "pipe",
    76  	itemRawString:  "raw string",
    77  	itemRightDelim: "right delim",
    78  	itemString:     "string",
    79  	// keywords
    80  	itemDot:      ".",
    81  	itemDefine:   "define",
    82  	itemElse:     "else",
    83  	itemIf:       "if",
    84  	itemEnd:      "end",
    85  	itemRange:    "range",
    86  	itemTemplate: "template",
    87  	itemWith:     "with",
    88  }
    89  
    90  func (i itemType) String() string {
    91  	s := itemName[i]
    92  	if s == "" {
    93  		return fmt.Sprintf("item%d", int(i))
    94  	}
    95  	return s
    96  }
    97  
    98  var key = map[string]itemType{
    99  	".":        itemDot,
   100  	"define":   itemDefine,
   101  	"else":     itemElse,
   102  	"end":      itemEnd,
   103  	"if":       itemIf,
   104  	"range":    itemRange,
   105  	"template": itemTemplate,
   106  	"with":     itemWith,
   107  }
   108  
   109  const eof = -1
   110  
   111  // stateFn represents the state of the scanner as a function that returns the next state.
   112  type stateFn func(*lexer) stateFn
   113  
   114  // lexer holds the state of the scanner.
   115  type lexer struct {
   116  	name  string    // the name of the input; used only for error reports.
   117  	input string    // the string being scanned.
   118  	state stateFn   // the next lexing function to enter
   119  	pos   int       // current position in the input.
   120  	start int       // start position of this item.
   121  	width int       // width of last rune read from input.
   122  	items chan item // channel of scanned items.
   123  }
   124  
   125  // next returns the next rune in the input.
   126  func (l *lexer) next() (rune int) {
   127  	if l.pos >= len(l.input) {
   128  		l.width = 0
   129  		return eof
   130  	}
   131  	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   132  	l.pos += l.width
   133  	return rune
   134  }
   135  
   136  // peek returns but does not consume the next rune in the input.
   137  func (l *lexer) peek() int {
   138  	rune := l.next()
   139  	l.backup()
   140  	return rune
   141  }
   142  
   143  // backup steps back one rune. Can only be called once per call of next.
   144  func (l *lexer) backup() {
   145  	l.pos -= l.width
   146  }
   147  
   148  // emit passes an item back to the client.
   149  func (l *lexer) emit(t itemType) {
   150  	l.items <- item{t, l.input[l.start:l.pos]}
   151  	l.start = l.pos
   152  }
   153  
   154  // ignore skips over the pending input before this point.
   155  func (l *lexer) ignore() {
   156  	l.start = l.pos
   157  }
   158  
   159  // accept consumes the next rune if it's from the valid set.
   160  func (l *lexer) accept(valid string) bool {
   161  	if strings.IndexRune(valid, l.next()) >= 0 {
   162  		return true
   163  	}
   164  	l.backup()
   165  	return false
   166  }
   167  
   168  // acceptRun consumes a run of runes from the valid set.
   169  func (l *lexer) acceptRun(valid string) {
   170  	for strings.IndexRune(valid, l.next()) >= 0 {
   171  	}
   172  	l.backup()
   173  }
   174  
   175  // lineNumber reports which line we're on. Doing it this way
   176  // means we don't have to worry about peek double counting.
   177  func (l *lexer) lineNumber() int {
   178  	return 1 + strings.Count(l.input[:l.pos], "\n")
   179  }
   180  
   181  // error returns an error token and terminates the scan by passing
   182  // back a nil pointer that will be the next state, terminating l.run.
   183  func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   184  	l.items <- item{itemError, fmt.Sprintf(format, args...)}
   185  	return nil
   186  }
   187  
   188  // nextItem returns the next item from the input.
   189  func (l *lexer) nextItem() item {
   190  	for {
   191  		select {
   192  		case item := <-l.items:
   193  			return item
   194  		default:
   195  			l.state = l.state(l)
   196  		}
   197  	}
   198  	panic("not reached")
   199  }
   200  
   201  // lex creates a new scanner for the input string.
   202  func lex(name, input string) *lexer {
   203  	l := &lexer{
   204  		name:  name,
   205  		input: input,
   206  		state: lexText,
   207  		items: make(chan item, 2), // Two items sufficient.
   208  	}
   209  	return l
   210  }
   211  
   212  // state functions
   213  
   214  const (
   215  	leftDelim    = "{{"
   216  	rightDelim   = "}}"
   217  	leftComment  = "{{/*"
   218  	rightComment = "*/}}"
   219  )
   220  
   221  // lexText scans until an opening action delimiter, "{{".
   222  func lexText(l *lexer) stateFn {
   223  	for {
   224  		if strings.HasPrefix(l.input[l.pos:], leftDelim) {
   225  			if l.pos > l.start {
   226  				l.emit(itemText)
   227  			}
   228  			return lexLeftDelim
   229  		}
   230  		if l.next() == eof {
   231  			break
   232  		}
   233  	}
   234  	// Correctly reached EOF.
   235  	if l.pos > l.start {
   236  		l.emit(itemText)
   237  	}
   238  	l.emit(itemEOF)
   239  	return nil
   240  }
   241  
   242  // lexLeftDelim scans the left delimiter, which is known to be present.
   243  func lexLeftDelim(l *lexer) stateFn {
   244  	if strings.HasPrefix(l.input[l.pos:], leftComment) {
   245  		return lexComment
   246  	}
   247  	l.pos += len(leftDelim)
   248  	l.emit(itemLeftDelim)
   249  	return lexInsideAction
   250  }
   251  
   252  // lexComment scans a comment. The left comment marker is known to be present.
   253  func lexComment(l *lexer) stateFn {
   254  	i := strings.Index(l.input[l.pos:], rightComment)
   255  	if i < 0 {
   256  		return l.errorf("unclosed comment")
   257  	}
   258  	l.pos += i + len(rightComment)
   259  	l.ignore()
   260  	return lexText
   261  }
   262  
   263  // lexRightDelim scans the right delimiter, which is known to be present.
   264  func lexRightDelim(l *lexer) stateFn {
   265  	l.pos += len(rightDelim)
   266  	l.emit(itemRightDelim)
   267  	return lexText
   268  }
   269  
   270  // lexInsideAction scans the elements inside action delimiters.
   271  func lexInsideAction(l *lexer) stateFn {
   272  	// Either number, quoted string, or identifier.
   273  	// Spaces separate and are ignored.
   274  	// Pipe symbols separate and are emitted.
   275  	for {
   276  		if strings.HasPrefix(l.input[l.pos:], rightDelim) {
   277  			return lexRightDelim
   278  		}
   279  		switch r := l.next(); {
   280  		case r == eof || r == '\n':
   281  			return l.errorf("unclosed action")
   282  		case isSpace(r):
   283  			l.ignore()
   284  		case r == '|':
   285  			l.emit(itemPipe)
   286  		case r == '"':
   287  			return lexQuote
   288  		case r == '`':
   289  			return lexRawQuote
   290  		case r == '.':
   291  			// special look-ahead for ".field" so we don't break l.backup().
   292  			if l.pos < len(l.input) {
   293  				r := l.input[l.pos]
   294  				if r < '0' || '9' < r {
   295  					return lexIdentifier // itemDot comes from the keyword table.
   296  				}
   297  			}
   298  			fallthrough // '.' can start a number.
   299  		case r == '+' || r == '-' || ('0' <= r && r <= '9'):
   300  			l.backup()
   301  			return lexNumber
   302  		case isAlphaNumeric(r):
   303  			l.backup()
   304  			return lexIdentifier
   305  		default:
   306  			return l.errorf("unrecognized character in action: %#U", r)
   307  		}
   308  	}
   309  	return nil
   310  }
   311  
   312  // lexIdentifier scans an alphanumeric or field.
   313  func lexIdentifier(l *lexer) stateFn {
   314  Loop:
   315  	for {
   316  		switch r := l.next(); {
   317  		case isAlphaNumeric(r):
   318  			// absorb.
   319  		case r == '.' && l.input[l.start] == '.':
   320  			// field chaining; absorb into one token.
   321  		default:
   322  			l.backup()
   323  			word := l.input[l.start:l.pos]
   324  			switch {
   325  			case key[word] > itemKeyword:
   326  				l.emit(key[word])
   327  			case word[0] == '.':
   328  				l.emit(itemField)
   329  			case word == "true", word == "false":
   330  				l.emit(itemBool)
   331  			default:
   332  				l.emit(itemIdentifier)
   333  			}
   334  			break Loop
   335  		}
   336  	}
   337  	return lexInsideAction
   338  }
   339  
   340  // lexNumber scans a number: decimal, octal, hex, float, or imaginary.  This
   341  // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
   342  // and "089" - but when it's wrong the input is invalid and the parser (via
   343  // strconv) will notice.
   344  func lexNumber(l *lexer) stateFn {
   345  	if !l.scanNumber() {
   346  		return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
   347  	}
   348  	if sign := l.peek(); sign == '+' || sign == '-' {
   349  		// Complex: 1+2i.  No spaces, must end in 'i'.
   350  		if !l.scanNumber() || l.input[l.pos-1] != 'i' {
   351  			return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
   352  		}
   353  		l.emit(itemComplex)
   354  	} else {
   355  		l.emit(itemNumber)
   356  	}
   357  	return lexInsideAction
   358  }
   359  
   360  func (l *lexer) scanNumber() bool {
   361  	// Optional leading sign.
   362  	l.accept("+-")
   363  	// Is it hex?
   364  	digits := "0123456789"
   365  	if l.accept("0") && l.accept("xX") {
   366  		digits = "0123456789abcdefABCDEF"
   367  	}
   368  	l.acceptRun(digits)
   369  	if l.accept(".") {
   370  		l.acceptRun(digits)
   371  	}
   372  	if l.accept("eE") {
   373  		l.accept("+-")
   374  		l.acceptRun("0123456789")
   375  	}
   376  	// Is it imaginary?
   377  	l.accept("i")
   378  	// Next thing mustn't be alphanumeric.
   379  	if isAlphaNumeric(l.peek()) {
   380  		l.next()
   381  		return false
   382  	}
   383  	return true
   384  }
   385  
   386  // lexQuote scans a quoted string.
   387  func lexQuote(l *lexer) stateFn {
   388  Loop:
   389  	for {
   390  		switch l.next() {
   391  		case '\\':
   392  			if r := l.next(); r != eof && r != '\n' {
   393  				break
   394  			}
   395  			fallthrough
   396  		case eof, '\n':
   397  			return l.errorf("unterminated quoted string")
   398  		case '"':
   399  			break Loop
   400  		}
   401  	}
   402  	l.emit(itemString)
   403  	return lexInsideAction
   404  }
   405  
   406  // lexRawQuote scans a raw quoted string.
   407  func lexRawQuote(l *lexer) stateFn {
   408  Loop:
   409  	for {
   410  		switch l.next() {
   411  		case eof, '\n':
   412  			return l.errorf("unterminated raw quoted string")
   413  		case '`':
   414  			break Loop
   415  		}
   416  	}
   417  	l.emit(itemRawString)
   418  	return lexInsideAction
   419  }
   420  
   421  // isSpace reports whether r is a space character.
   422  func isSpace(r int) bool {
   423  	switch r {
   424  	case ' ', '\t', '\n', '\r':
   425  		return true
   426  	}
   427  	return false
   428  }
   429  
   430  // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
   431  func isAlphaNumeric(r int) bool {
   432  	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
   433  }