github.com/servernoj/jade@v0.0.0-20231225191405-efec98d19db1/lex.go

github.com/servernoj/jade@v0.0.0-20231225191405-efec98d19db1/lex.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package jade
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  )
    13  
    14  // item represents a token or text string returned from the scanner.
    15  type item struct {
    16  	typ   itemType // The type of this item.
    17  	pos   pos      // The starting position, in bytes, of this item in the input string.
    18  	val   string   // The value of this item.
    19  	line  int      // The line number at the start of this item.
    20  	depth int
    21  }
    22  
    23  func (i item) String() string {
    24  	switch {
    25  	case i.typ == itemEOF:
    26  		return "EOF"
    27  	case i.typ == itemError:
    28  		return i.val
    29  	// case i.typ > itemKeyword:
    30  	// 	return fmt.Sprintf("<%s>", i.val)
    31  	case len(i.val) > 10:
    32  		return fmt.Sprintf("%.10q...", i.val)
    33  	}
    34  	return fmt.Sprintf("%q", i.val)
    35  }
    36  
    37  const (
    38  	eof        = -1
    39  	spaceChars = " \t\r\n" // These are the space characters defined by Go itself.
    40  )
    41  
    42  // stateFn represents the state of the scanner as a function that returns the next state.
    43  type stateFn func(*lexer) stateFn
    44  
    45  // lexer holds the state of the scanner.
    46  type lexer struct {
    47  	name  string    // the name of the input; used only for error reports
    48  	input string    // the string being scanned
    49  	pos   pos       // current position in the input
    50  	start pos       // start position of this item
    51  	width pos       // width of last rune read from input
    52  	items chan item // channel of scanned items
    53  	line  int       // 1+number of newlines seen
    54  
    55  	depth         int  // current tag depth
    56  	interpolation int  // interpolation depth
    57  	longtext      bool // long text flag
    58  }
    59  
    60  // next returns the next rune in the input.
    61  func (l *lexer) next() rune {
    62  	if int(l.pos) >= len(l.input) {
    63  		l.width = 0
    64  		return eof
    65  	}
    66  	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
    67  	l.width = pos(w)
    68  	l.pos += l.width
    69  	if r == '\n' {
    70  		l.line++
    71  	}
    72  	return r
    73  }
    74  
    75  // peek returns but does not consume the next rune in the input.
    76  func (l *lexer) peek() rune {
    77  	r := l.next()
    78  	l.backup()
    79  	return r
    80  }
    81  
    82  // backup steps back one rune. Can only be called once per call of next.
    83  func (l *lexer) backup() {
    84  	l.pos -= l.width
    85  	// Correct newline count.
    86  	if l.width == 1 && l.input[l.pos] == '\n' {
    87  		l.line--
    88  	}
    89  }
    90  
    91  // emit passes an item back to the client.
    92  func (l *lexer) emit(t itemType) {
    93  	l.items <- item{t, l.start, l.input[l.start:l.pos], l.line, l.depth}
    94  	// Some items contain text internally. If so, count their newlines.
    95  	switch t {
    96  	// case itemText, itemRawString, itemLeftDelim, itemRightDelim:
    97  	case itemText:
    98  		l.line += strings.Count(l.input[l.start:l.pos], "\n")
    99  	}
   100  	l.start = l.pos
   101  }
   102  
   103  // ignore skips over the pending input before this point.
   104  func (l *lexer) ignore() {
   105  	l.line += strings.Count(l.input[l.start:l.pos], "\n")
   106  	l.start = l.pos
   107  }
   108  
   109  // accept consumes the next rune if it's from the valid set.
   110  func (l *lexer) accept(valid string) bool {
   111  	if strings.ContainsRune(valid, l.next()) {
   112  		return true
   113  	}
   114  	l.backup()
   115  	return false
   116  }
   117  
   118  // acceptRun consumes a run of runes from the valid set.
   119  func (l *lexer) acceptRun(valid string) {
   120  	for strings.ContainsRune(valid, l.next()) {
   121  	}
   122  	l.backup()
   123  }
   124  
   125  // errorf returns an error token and terminates the scan by passing
   126  // back a nil pointer that will be the next state, terminating l.nextItem.
   127  func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   128  	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line, l.depth}
   129  	return nil
   130  }
   131  
   132  // nextItem returns the next item from the input.
   133  // Called by the parser, not in the lexing goroutine.
   134  func (l *lexer) nextItem() item {
   135  	return <-l.items
   136  }
   137  
   138  // drain drains the output so the lexing goroutine will exit.
   139  // Called by the parser, not in the lexing goroutine.
   140  func (l *lexer) drain() {
   141  	for range l.items {
   142  	}
   143  }
   144  
   145  // lex creates a new scanner for the input string.
   146  func lex(name string, input []byte) *lexer {
   147  	l := &lexer{
   148  		name:  name,
   149  		input: string(input),
   150  		items: make(chan item),
   151  		line:  1,
   152  	}
   153  	go l.run()
   154  	return l
   155  }
   156  
   157  func (l *lexer) run() {
   158  	for state := lexIndents; state != nil; {
   159  		state = state(l)
   160  	}
   161  	close(l.items)
   162  }
   163  
   164  // atTerminator reports whether the input is at valid termination character to
   165  // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
   166  // like "$x+2" not being acceptable without a space, in case we decide one
   167  // day to implement arithmetic.
   168  func (l *lexer) atTerminator() bool {
   169  	r := l.peek()
   170  	if isSpace(r) || isEndOfLine(r) {
   171  		return true
   172  	}
   173  	switch r {
   174  	case eof, '.', ',', '|', ':', ')', '(':
   175  		return true
   176  	}
   177  
   178  	return false
   179  }
   180  
   181  func (l *lexer) scanNumber() bool {
   182  	// Optional leading sign.
   183  	l.accept("+-")
   184  	// Is it hex?
   185  	digits := "0123456789"
   186  	if l.accept("0") && l.accept("xX") {
   187  		digits = "0123456789abcdefABCDEF"
   188  	}
   189  	l.acceptRun(digits)
   190  	if l.accept(".") {
   191  		l.acceptRun(digits)
   192  	}
   193  	if l.accept("eE") {
   194  		l.accept("+-")
   195  		l.acceptRun("0123456789")
   196  	}
   197  	// Is it imaginary?
   198  	l.accept("i")
   199  	// Next thing mustn't be alphanumeric.
   200  	if isAlphaNumeric(l.peek()) {
   201  		l.next()
   202  		return false
   203  	}
   204  	return true
   205  }
   206  
   207  // isSpace reports whether r is a space character.
   208  func isSpace(r rune) bool {
   209  	return r == ' ' || r == '\t'
   210  }
   211  
   212  // isEndOfLine reports whether r is an end-of-line character.
   213  func isEndOfLine(r rune) bool {
   214  	return r == '\r' || r == '\n'
   215  }
   216  
   217  // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
   218  func isAlphaNumeric(r rune) bool {
   219  	return r == '_' || r == '-' || unicode.IsLetter(r) || unicode.IsDigit(r)
   220  }