bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/conf/rule/parse/lex.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package parse
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  )
    13  
    14  // item represents a token or text string returned from the scanner.
    15  type item struct {
    16  	typ itemType // The type of this item.
    17  	pos Pos      // The starting position, in bytes, of this item in the input string.
    18  	val string   // The value of this item.
    19  }
    20  
    21  func (i item) String() string {
    22  	switch {
    23  	case i.typ == itemEOF:
    24  		return "EOF"
    25  	case i.typ == itemError:
    26  		return i.val
    27  	case len(i.val) > 10:
    28  		return fmt.Sprintf("%.10q...", i.val)
    29  	}
    30  	return fmt.Sprintf("%q", i.val)
    31  }
    32  
    33  // itemType identifies the type of lex items.
    34  type itemType int
    35  
    36  const (
    37  	itemError itemType = iota // error occurred; value is text of error
    38  	itemEOF
    39  	itemEqual                // '='
    40  	itemLeftDelim            // '{'
    41  	itemRawString            // raw string (includes quotes)
    42  	itemIdentifier           // identifier for section and value names
    43  	itemRightDelim           // '}'
    44  	itemString               // string (excluding prefix whitespace and EOL or NL at EOL)
    45  	itemSubsectionIdentifier // identifier for subsection names
    46  )
    47  
    48  const eof = -1
    49  
    50  // stateFn represents the state of the scanner as a function that returns the next state.
    51  type stateFn func(*lexer) stateFn
    52  
    53  // lexer holds the state of the scanner.
    54  type lexer struct {
    55  	name    string    // the name of the input; used only for error reports
    56  	input   string    // the string being scanned
    57  	state   stateFn   // the next lexing function to enter
    58  	pos     Pos       // current position in the input
    59  	start   Pos       // start position of this item
    60  	width   Pos       // width of last rune read from input
    61  	lastPos Pos       // position of most recent item returned by nextItem
    62  	items   chan item // channel of scanned items
    63  }
    64  
    65  // next returns the next rune in the input.
    66  func (l *lexer) next() rune {
    67  	if int(l.pos) >= len(l.input) {
    68  		l.width = 0
    69  		return eof
    70  	}
    71  	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
    72  	l.width = Pos(w)
    73  	l.pos += l.width
    74  	return r
    75  }
    76  
    77  // peek returns but does not consume the next rune in the input.
    78  func (l *lexer) peek() rune {
    79  	r := l.next()
    80  	l.backup()
    81  	return r
    82  }
    83  
    84  // backup steps back one rune. Can only be called once per call of next.
    85  func (l *lexer) backup() {
    86  	l.pos -= l.width
    87  }
    88  
    89  // emit passes an item back to the client.
    90  func (l *lexer) emit(t itemType) {
    91  	l.items <- item{t, l.start, l.input[l.start:l.pos]}
    92  	l.start = l.pos
    93  }
    94  
    95  // ignore skips over the pending input before this point.
    96  func (l *lexer) ignore() {
    97  	l.start = l.pos
    98  }
    99  
   100  // lineNumber reports which line we're on, based on the position of
   101  // the previous item returned by nextItem. Doing it this way
   102  // means we don't have to worry about peek double counting.
   103  func (l *lexer) lineNumber() int {
   104  	return 1 + strings.Count(l.input[:l.lastPos], "\n")
   105  }
   106  
   107  // errorf returns an error token and terminates the scan by passing
   108  // back a nil pointer that will be the next state, terminating l.nextItem.
   109  func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   110  	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
   111  	return nil
   112  }
   113  
   114  // nextItem returns the next item from the input.
   115  func (l *lexer) nextItem() item {
   116  	item := <-l.items
   117  	l.lastPos = item.pos
   118  	return item
   119  }
   120  
   121  // lex creates a new scanner for the input string.
   122  func lex(name, input string) *lexer {
   123  	l := &lexer{
   124  		name:  name,
   125  		input: input,
   126  		items: make(chan item),
   127  	}
   128  	go l.run()
   129  	return l
   130  }
   131  
   132  // run runs the state machine for the lexer.
   133  func (l *lexer) run() {
   134  	for l.state = lexSpace; l.state != nil; {
   135  		l.state = l.state(l)
   136  	}
   137  }
   138  
   139  // state functions
   140  
   141  const (
   142  	leftDelim  = '{'
   143  	rightDelim = '}'
   144  	equal      = '='
   145  	comment    = '#'
   146  	newLine    = "\n"
   147  )
   148  
   149  // lexSpace scans until start of section or value
   150  func lexSpace(l *lexer) stateFn {
   151  Loop:
   152  	for {
   153  		switch r := l.next(); {
   154  		case r == leftDelim:
   155  			return lexLeftDelim
   156  		case r == rightDelim:
   157  			return lexRightDelim
   158  		case r == equal:
   159  			return lexEqual
   160  		case isVarchar(r):
   161  			l.backup()
   162  			return lexValue
   163  		case isSpace(r) || isEndOfLine(r):
   164  			l.ignore()
   165  		case r == eof:
   166  			l.emit(itemEOF)
   167  			break Loop
   168  		case r == comment:
   169  			return lexComment
   170  		default:
   171  			return l.errorf("invalid character: %v", string(r))
   172  		}
   173  	}
   174  	return nil
   175  }
   176  
   177  func lexComment(l *lexer) stateFn {
   178  	i := strings.Index(l.input[l.pos:], newLine)
   179  	if i < 0 {
   180  		l.emit(itemEOF)
   181  		return nil
   182  	}
   183  	l.pos += Pos(i + len(newLine))
   184  	l.ignore()
   185  	return lexSpace
   186  }
   187  
   188  func lexLeftDelim(l *lexer) stateFn {
   189  	l.emit(itemLeftDelim)
   190  	return lexSpace
   191  }
   192  
   193  func lexRightDelim(l *lexer) stateFn {
   194  	l.emit(itemRightDelim)
   195  	return lexSpace
   196  }
   197  
   198  func lexValue(l *lexer) stateFn {
   199  	l.ignore()
   200  	for {
   201  		switch r := l.next(); {
   202  		case isVarchar(r):
   203  			// absorb
   204  		default:
   205  			l.backup()
   206  			l.emit(itemIdentifier)
   207  			return lexValueNext
   208  		}
   209  	}
   210  }
   211  
   212  func lexValueNext(l *lexer) stateFn {
   213  	for {
   214  		switch r := l.next(); {
   215  		case isSpace(r) || isEndOfLine(r):
   216  			l.ignore()
   217  		case r == equal:
   218  			return lexEqual
   219  		case isSubsectionChar(r):
   220  			l.backup()
   221  			return lexSubsection
   222  		default:
   223  			return l.errorf("invalid character: %v", string(r))
   224  		}
   225  	}
   226  }
   227  
   228  func lexSubsection(l *lexer) stateFn {
   229  Loop:
   230  	for {
   231  		switch r := l.next(); {
   232  		case isSubsectionChar(r):
   233  			// absorb
   234  		default:
   235  			l.backup()
   236  			break Loop
   237  		}
   238  	}
   239  	l.emit(itemSubsectionIdentifier)
   240  	return lexSpace
   241  }
   242  
   243  func isSubsectionChar(r rune) bool {
   244  	return isVarchar(r) || r == '*' || r == ',' || r == '=' || r == '|'
   245  }
   246  
   247  func lexEqual(l *lexer) stateFn {
   248  	l.emit(itemEqual)
   249  	for isSpace(l.peek()) {
   250  		l.next()
   251  	}
   252  	l.ignore()
   253  	if l.peek() == '`' {
   254  		return lexRawString
   255  	}
   256  	return lexString
   257  }
   258  
   259  func lexString(l *lexer) stateFn {
   260  	for {
   261  		switch r := l.next(); {
   262  		case isEndOfLine(r) || r == eof:
   263  			l.backup()
   264  			l.emit(itemString)
   265  			return lexSpace
   266  		}
   267  	}
   268  }
   269  
   270  func lexRawString(l *lexer) stateFn {
   271  	l.next()
   272  Loop:
   273  	for {
   274  		switch l.next() {
   275  		case eof:
   276  			return l.errorf("unterminated raw string")
   277  		case '`':
   278  			break Loop
   279  		}
   280  	}
   281  	l.emit(itemRawString)
   282  	return lexSpace
   283  }
   284  
   285  // isSpace reports whether r is a space character.
   286  func isSpace(r rune) bool {
   287  	return r == ' ' || r == '\t'
   288  }
   289  
   290  // isEndOfLine reports whether r is an end-of-line character.
   291  func isEndOfLine(r rune) bool {
   292  	return r == '\r' || r == '\n'
   293  }
   294  
   295  func isVarchar(r rune) bool {
   296  	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '.' || r == '$' || r == '/'
   297  }