bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/conf/rule/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 } 20 21 func (i item) String() string { 22 switch { 23 case i.typ == itemEOF: 24 return "EOF" 25 case i.typ == itemError: 26 return i.val 27 case len(i.val) > 10: 28 return fmt.Sprintf("%.10q...", i.val) 29 } 30 return fmt.Sprintf("%q", i.val) 31 } 32 33 // itemType identifies the type of lex items. 34 type itemType int 35 36 const ( 37 itemError itemType = iota // error occurred; value is text of error 38 itemEOF 39 itemEqual // '=' 40 itemLeftDelim // '{' 41 itemRawString // raw string (includes quotes) 42 itemIdentifier // identifier for section and value names 43 itemRightDelim // '}' 44 itemString // string (excluding prefix whitespace and EOL or NL at EOL) 45 itemSubsectionIdentifier // identifier for subsection names 46 ) 47 48 const eof = -1 49 50 // stateFn represents the state of the scanner as a function that returns the next state. 51 type stateFn func(*lexer) stateFn 52 53 // lexer holds the state of the scanner. 54 type lexer struct { 55 name string // the name of the input; used only for error reports 56 input string // the string being scanned 57 state stateFn // the next lexing function to enter 58 pos Pos // current position in the input 59 start Pos // start position of this item 60 width Pos // width of last rune read from input 61 lastPos Pos // position of most recent item returned by nextItem 62 items chan item // channel of scanned items 63 } 64 65 // next returns the next rune in the input. 66 func (l *lexer) next() rune { 67 if int(l.pos) >= len(l.input) { 68 l.width = 0 69 return eof 70 } 71 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 72 l.width = Pos(w) 73 l.pos += l.width 74 return r 75 } 76 77 // peek returns but does not consume the next rune in the input. 78 func (l *lexer) peek() rune { 79 r := l.next() 80 l.backup() 81 return r 82 } 83 84 // backup steps back one rune. Can only be called once per call of next. 85 func (l *lexer) backup() { 86 l.pos -= l.width 87 } 88 89 // emit passes an item back to the client. 90 func (l *lexer) emit(t itemType) { 91 l.items <- item{t, l.start, l.input[l.start:l.pos]} 92 l.start = l.pos 93 } 94 95 // ignore skips over the pending input before this point. 96 func (l *lexer) ignore() { 97 l.start = l.pos 98 } 99 100 // lineNumber reports which line we're on, based on the position of 101 // the previous item returned by nextItem. Doing it this way 102 // means we don't have to worry about peek double counting. 103 func (l *lexer) lineNumber() int { 104 return 1 + strings.Count(l.input[:l.lastPos], "\n") 105 } 106 107 // errorf returns an error token and terminates the scan by passing 108 // back a nil pointer that will be the next state, terminating l.nextItem. 109 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 110 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 111 return nil 112 } 113 114 // nextItem returns the next item from the input. 115 func (l *lexer) nextItem() item { 116 item := <-l.items 117 l.lastPos = item.pos 118 return item 119 } 120 121 // lex creates a new scanner for the input string. 122 func lex(name, input string) *lexer { 123 l := &lexer{ 124 name: name, 125 input: input, 126 items: make(chan item), 127 } 128 go l.run() 129 return l 130 } 131 132 // run runs the state machine for the lexer. 133 func (l *lexer) run() { 134 for l.state = lexSpace; l.state != nil; { 135 l.state = l.state(l) 136 } 137 } 138 139 // state functions 140 141 const ( 142 leftDelim = '{' 143 rightDelim = '}' 144 equal = '=' 145 comment = '#' 146 newLine = "\n" 147 ) 148 149 // lexSpace scans until start of section or value 150 func lexSpace(l *lexer) stateFn { 151 Loop: 152 for { 153 switch r := l.next(); { 154 case r == leftDelim: 155 return lexLeftDelim 156 case r == rightDelim: 157 return lexRightDelim 158 case r == equal: 159 return lexEqual 160 case isVarchar(r): 161 l.backup() 162 return lexValue 163 case isSpace(r) || isEndOfLine(r): 164 l.ignore() 165 case r == eof: 166 l.emit(itemEOF) 167 break Loop 168 case r == comment: 169 return lexComment 170 default: 171 return l.errorf("invalid character: %v", string(r)) 172 } 173 } 174 return nil 175 } 176 177 func lexComment(l *lexer) stateFn { 178 i := strings.Index(l.input[l.pos:], newLine) 179 if i < 0 { 180 l.emit(itemEOF) 181 return nil 182 } 183 l.pos += Pos(i + len(newLine)) 184 l.ignore() 185 return lexSpace 186 } 187 188 func lexLeftDelim(l *lexer) stateFn { 189 l.emit(itemLeftDelim) 190 return lexSpace 191 } 192 193 func lexRightDelim(l *lexer) stateFn { 194 l.emit(itemRightDelim) 195 return lexSpace 196 } 197 198 func lexValue(l *lexer) stateFn { 199 l.ignore() 200 for { 201 switch r := l.next(); { 202 case isVarchar(r): 203 // absorb 204 default: 205 l.backup() 206 l.emit(itemIdentifier) 207 return lexValueNext 208 } 209 } 210 } 211 212 func lexValueNext(l *lexer) stateFn { 213 for { 214 switch r := l.next(); { 215 case isSpace(r) || isEndOfLine(r): 216 l.ignore() 217 case r == equal: 218 return lexEqual 219 case isSubsectionChar(r): 220 l.backup() 221 return lexSubsection 222 default: 223 return l.errorf("invalid character: %v", string(r)) 224 } 225 } 226 } 227 228 func lexSubsection(l *lexer) stateFn { 229 Loop: 230 for { 231 switch r := l.next(); { 232 case isSubsectionChar(r): 233 // absorb 234 default: 235 l.backup() 236 break Loop 237 } 238 } 239 l.emit(itemSubsectionIdentifier) 240 return lexSpace 241 } 242 243 func isSubsectionChar(r rune) bool { 244 return isVarchar(r) || r == '*' || r == ',' || r == '=' || r == '|' 245 } 246 247 func lexEqual(l *lexer) stateFn { 248 l.emit(itemEqual) 249 for isSpace(l.peek()) { 250 l.next() 251 } 252 l.ignore() 253 if l.peek() == '`' { 254 return lexRawString 255 } 256 return lexString 257 } 258 259 func lexString(l *lexer) stateFn { 260 for { 261 switch r := l.next(); { 262 case isEndOfLine(r) || r == eof: 263 l.backup() 264 l.emit(itemString) 265 return lexSpace 266 } 267 } 268 } 269 270 func lexRawString(l *lexer) stateFn { 271 l.next() 272 Loop: 273 for { 274 switch l.next() { 275 case eof: 276 return l.errorf("unterminated raw string") 277 case '`': 278 break Loop 279 } 280 } 281 l.emit(itemRawString) 282 return lexSpace 283 } 284 285 // isSpace reports whether r is a space character. 286 func isSpace(r rune) bool { 287 return r == ' ' || r == '\t' 288 } 289 290 // isEndOfLine reports whether r is an end-of-line character. 291 func isEndOfLine(r rune) bool { 292 return r == '\r' || r == '\n' 293 } 294 295 func isVarchar(r rune) bool { 296 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '.' || r == '$' || r == '/' 297 }