github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/talks/2011/lex/lex1.oldgo (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // reformatted, slightly edited version of lex.go from weekly.11-06-23 6 7 package template 8 9 import ( 10 "fmt" 11 "strings" 12 "unicode" 13 "utf8" 14 ) 15 16 // item represents a token returned from the scanner. 17 type item struct { 18 typ itemType // Type, such as itemNumber. 19 val string // Value, such as "23.2". 20 } 21 22 func (i item) String() string { 23 switch i.typ { 24 case itemEOF: 25 return "EOF" 26 case itemError: 27 return i.val 28 } 29 if len(i.val) > 10 { 30 return fmt.Sprintf("%.10q...", i.val) 31 } 32 return fmt.Sprintf("%q", i.val) 33 } 34 35 // itemType identifies the type of lex items. 36 type itemType int 37 38 const ( 39 itemError itemType = iota // error occurred; 40 // value is text of error 41 itemDot // the cursor, spelled '.' 42 itemEOF 43 itemElse // else keyword 44 itemEnd // end keyword 45 itemField // identifier, starting with '.' 46 itemIdentifier // identifier 47 itemIf // if keyword 48 itemLeftMeta // left meta-string 49 itemNumber // number 50 itemPipe // pipe symbol 51 itemRange // range keyword 52 itemRawString // raw quoted string (includes quotes) 53 itemRightMeta // right meta-string 54 itemString // quoted string (includes quotes) 55 itemText // plain text 56 ) 57 58 // Make the types prettyprint. 59 var itemName = map[itemType]string{ 60 itemError: "error", 61 itemDot: ".", 62 itemEOF: "EOF", 63 itemElse: "else", 64 itemEnd: "end", 65 itemField: "field", 66 itemIdentifier: "identifier", 67 itemIf: "if", 68 itemLeftMeta: "left meta", 69 itemNumber: "number", 70 itemPipe: "pipe", 71 itemRange: "range", 72 itemRawString: "raw string", 73 itemRightMeta: "rightMeta", 74 itemString: "string", 75 itemText: "text", 76 } 77 78 func (i itemType) String() string { 79 s := itemName[i] 80 if s == "" { 81 return fmt.Sprintf("item%d", int(i)) 82 } 83 return s 84 } 85 86 var key = map[string]itemType{ 87 ".": itemDot, 88 "else": itemElse, 89 "end": itemEnd, 90 "if": itemIf, 91 "range": itemRange, 92 } 93 94 const eof = -1 95 96 // stateFn represents the state of the scanner 97 // as a function that returns the next state. 98 type stateFn func(*lexer) stateFn 99 100 // lexer holds the state of the scanner. 101 type lexer struct { 102 name string // used only for error reports. 103 input string // the string being scanned. 104 start int // start position of this item. 105 pos int // current position in the input. 106 width int // width of last rune read from input. 107 items chan item // channel of scanned items. 108 } 109 110 // next returns the next rune in the input. 111 func (l *lexer) next() (rune int) { 112 if l.pos >= len(l.input) { 113 l.width = 0 114 return eof 115 } 116 rune, l.width = 117 utf8.DecodeRuneInString(l.input[l.pos:]) 118 l.pos += l.width 119 return rune 120 } 121 122 // peek returns but does not consume 123 // the next rune in the input. 124 func (l *lexer) peek() int { 125 rune := l.next() 126 l.backup() 127 return rune 128 } 129 130 // backup steps back one rune. 131 // Can be called only once per call of next. 132 func (l *lexer) backup() { 133 l.pos -= l.width 134 } 135 136 // emit passes an item back to the client. 137 func (l *lexer) emit(t itemType) { 138 l.items <- item{t, l.input[l.start:l.pos]} 139 l.start = l.pos 140 } 141 142 // ignore skips over the pending input before this point. 143 func (l *lexer) ignore() { 144 l.start = l.pos 145 } 146 147 // accept consumes the next rune 148 // if it's from the valid set. 149 func (l *lexer) accept(valid string) bool { 150 if strings.IndexRune(valid, l.next()) >= 0 { 151 return true 152 } 153 l.backup() 154 return false 155 } 156 157 // acceptRun consumes a run of runes from the valid set. 158 func (l *lexer) acceptRun(valid string) { 159 for strings.IndexRune(valid, l.next()) >= 0 { 160 } 161 l.backup() 162 } 163 164 // lineNumber reports which line we're on. Doing it this way 165 // means we don't have to worry about peek double counting. 166 func (l *lexer) lineNumber() int { 167 return 1 + strings.Count(l.input[:l.pos], "\n") 168 } 169 170 // error returns an error token and terminates the scan 171 // by passing back a nil pointer that will be the next 172 // state, terminating l.run. 173 func (l *lexer) errorf(format string, args ...interface{}) 174 stateFn { 175 l.items <- item{ 176 itemError, 177 fmt.Sprintf(format, args...), 178 } 179 return nil 180 } 181 182 // run lexes the input by executing state functions until 183 // the state is nil. 184 func (l *lexer) run() { 185 for state := lexText; state != nil; { 186 state = state(l) 187 } 188 close(l.items) // No more tokens will be delivered. 189 } 190 191 // lex launches a new scanner and returns the channel of items. 192 func lex(name, input string) (*lexer, chan item) { 193 l := &lexer{ 194 name: name, 195 input: input, 196 items: make(chan item), 197 } 198 go l.run() // Concurrently run state machine. 199 return l, l.items 200 } 201 202 // state functions 203 204 const leftMeta = "{{" 205 const rightMeta = "}}" 206 207 // lexText scans until a metacharacter 208 func lexText(l *lexer) stateFn { 209 for { 210 if strings.HasPrefix(l.input[l.pos:], leftMeta) { 211 if l.pos > l.start { 212 l.emit(itemText) 213 } 214 return lexLeftMeta // Next state. 215 } 216 if l.next() == eof { break } 217 } 218 // Correctly reached EOF. 219 if l.pos > l.start { 220 l.emit(itemText) 221 } 222 l.emit(itemEOF) // Useful to make EOF a token. 223 return nil // Stop the run loop. 224 } 225 226 // leftMeta scans the left "metacharacter", which is known to be present. 227 func lexLeftMeta(l *lexer) stateFn { 228 l.pos += len(leftMeta) 229 l.emit(itemLeftMeta) 230 return lexInsideAction // Now inside {{ }}. 231 } 232 233 // rightMeta scans the right "metacharacter", which is known to be present. 234 func lexRightMeta(l *lexer) stateFn { 235 l.pos += len(rightMeta) 236 l.emit(itemRightMeta) 237 return lexText 238 } 239 240 // lexInsideAction scans the elements inside "metacharacters". 241 func lexInsideAction(l *lexer) stateFn { 242 // Either number, quoted string, or identifier. 243 // Spaces separate and are ignored. 244 // Pipe symbols separate and are emitted. 245 for { 246 if strings.HasPrefix(l.input[l.pos:], rightMeta) { 247 return lexRightMeta 248 } 249 switch r := l.next(); { 250 case r == eof || r == '\n': 251 return l.errorf("unclosed action") 252 case isSpace(r): 253 l.ignore() 254 case r == '|': 255 l.emit(itemPipe) 256 case r == '"': 257 return lexQuote 258 case r == '`': 259 return lexRawQuote 260 case r == '.': 261 // special look-ahead for ".field" so we don't break l.backup(). 262 if l.pos < len(l.input) { 263 r := l.input[l.pos] 264 if r < '0' || '9' < r { 265 return lexIdentifier // itemDot comes from the keyword table. 266 } 267 } 268 fallthrough // '.' can start a number. 269 case r == '+' || r == '-' || '0' <= r && r <= '9': 270 l.backup() 271 return lexNumber 272 case isAlphaNumeric(r): 273 l.backup() 274 return lexIdentifier 275 default: 276 return l.errorf("unrecognized character in action: %#U", r) 277 } 278 } 279 return nil 280 } 281 282 // lexIdentifier scans an alphanumeric or field. 283 func lexIdentifier(l *lexer) stateFn { 284 Loop: 285 for { 286 switch r := l.next(); { 287 case isAlphaNumeric(r): 288 // absorb 289 default: 290 l.backup() 291 word := l.input[l.start:l.pos] 292 switch { 293 case key[word] != itemError: 294 l.emit(key[word]) 295 case word[0] == '.': 296 l.emit(itemField) 297 default: 298 l.emit(itemIdentifier) 299 } 300 break Loop 301 } 302 } 303 return lexInsideAction 304 } 305 306 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 307 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 308 // and "089" - but when it's wrong the input is invalid and the parser (via 309 // strconv) will notice. 310 // TODO: without expressions you can do imaginary but not complex. 311 func lexNumber(l *lexer) stateFn { 312 // Optional leading sign. 313 l.accept("+-") 314 // Is it hex? 315 digits := "0123456789" 316 if l.accept("0") && l.accept("xX") { 317 digits = "0123456789abcdefABCDEF" 318 } 319 l.acceptRun(digits) 320 if l.accept(".") { 321 l.acceptRun(digits) 322 } 323 if l.accept("eE") { 324 l.accept("+-") 325 l.acceptRun("0123456789") 326 } 327 // Is it imaginary? 328 l.accept("i") 329 // Next thing mustn't be alphanumeric. 330 if isAlphaNumeric(l.peek()) { 331 l.next() 332 return l.errorf("bad number syntax: %q", 333 l.input[l.start:l.pos]) 334 } 335 l.emit(itemNumber) 336 return lexInsideAction 337 } 338 339 // lexQuote scans a quoted string. 340 func lexQuote(l *lexer) stateFn { 341 Loop: 342 for { 343 switch l.next() { 344 case '\\': 345 if r := l.next(); r != eof && r != '\n' { 346 break 347 } 348 fallthrough 349 case eof, '\n': 350 return l.errorf("unterminated quoted string") 351 case '"': 352 break Loop 353 } 354 } 355 l.emit(itemString) 356 return lexInsideAction 357 } 358 359 // lexRawQuote scans a raw quoted string. 360 func lexRawQuote(l *lexer) stateFn { 361 Loop: 362 for { 363 switch l.next() { 364 case eof, '\n': 365 return l.errorf("unterminated raw quoted string") 366 case '`': 367 break Loop 368 } 369 } 370 l.emit(itemRawString) 371 return lexInsideAction 372 } 373 374 // isSpace reports whether r is a space character. 375 func isSpace(r int) bool { 376 switch r { 377 case ' ', '\t', '\n', '\r': 378 return true 379 } 380 return false 381 } 382 383 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 384 func isAlphaNumeric(r int) bool { 385 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 386 }