github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/talks/2011/lex/r59-lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 package template 8 9 import ( 10 "fmt" 11 "strings" 12 "unicode" 13 "utf8" 14 ) 15 16 // item represents a token or text string returned from the scanner. 17 type item struct { 18 typ itemType 19 val string 20 } 21 22 func (i item) String() string { 23 switch { 24 case i.typ == itemEOF: 25 return "EOF" 26 case i.typ == itemError: 27 return i.val 28 case i.typ > itemKeyword: 29 return fmt.Sprintf("<%s>", i.val) 30 case len(i.val) > 10: 31 return fmt.Sprintf("%.10q...", i.val) 32 } 33 return fmt.Sprintf("%q", i.val) 34 } 35 36 // itemType identifies the type of lex items. 37 type itemType int 38 39 const ( 40 itemError itemType = iota // error occurred; value is text of error 41 itemBool // boolean constant 42 itemComplex // complex constant (1+2i); imaginary is just a number 43 itemEOF 44 itemField // alphanumeric identifier, starting with '.', possibly chained ('.x.y') 45 itemIdentifier // alphanumeric identifier 46 itemLeftDelim // left action delimiter 47 itemNumber // simple number, including imaginary 48 itemPipe // pipe symbol 49 itemRawString // raw quoted string (includes quotes) 50 itemRightDelim // right action delimiter 51 itemString // quoted string (includes quotes) 52 itemText // plain text 53 // Keywords appear after all the rest. 54 itemKeyword // used only to delimit the keywords 55 itemDot // the cursor, spelled '.'. 56 itemDefine // define keyword 57 itemElse // else keyword 58 itemEnd // end keyword 59 itemIf // if keyword 60 itemRange // range keyword 61 itemTemplate // template keyword 62 itemWith // with keyword 63 ) 64 65 // Make the types prettyprint. 66 var itemName = map[itemType]string{ 67 itemError: "error", 68 itemBool: "bool", 69 itemComplex: "complex", 70 itemEOF: "EOF", 71 itemField: "field", 72 itemIdentifier: "identifier", 73 itemLeftDelim: "left delim", 74 itemNumber: "number", 75 itemPipe: "pipe", 76 itemRawString: "raw string", 77 itemRightDelim: "right delim", 78 itemString: "string", 79 // keywords 80 itemDot: ".", 81 itemDefine: "define", 82 itemElse: "else", 83 itemIf: "if", 84 itemEnd: "end", 85 itemRange: "range", 86 itemTemplate: "template", 87 itemWith: "with", 88 } 89 90 func (i itemType) String() string { 91 s := itemName[i] 92 if s == "" { 93 return fmt.Sprintf("item%d", int(i)) 94 } 95 return s 96 } 97 98 var key = map[string]itemType{ 99 ".": itemDot, 100 "define": itemDefine, 101 "else": itemElse, 102 "end": itemEnd, 103 "if": itemIf, 104 "range": itemRange, 105 "template": itemTemplate, 106 "with": itemWith, 107 } 108 109 const eof = -1 110 111 // stateFn represents the state of the scanner as a function that returns the next state. 112 type stateFn func(*lexer) stateFn 113 114 // lexer holds the state of the scanner. 115 type lexer struct { 116 name string // the name of the input; used only for error reports. 117 input string // the string being scanned. 118 state stateFn // the next lexing function to enter 119 pos int // current position in the input. 120 start int // start position of this item. 121 width int // width of last rune read from input. 122 items chan item // channel of scanned items. 123 } 124 125 // next returns the next rune in the input. 126 func (l *lexer) next() (rune int) { 127 if l.pos >= len(l.input) { 128 l.width = 0 129 return eof 130 } 131 rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 132 l.pos += l.width 133 return rune 134 } 135 136 // peek returns but does not consume the next rune in the input. 137 func (l *lexer) peek() int { 138 rune := l.next() 139 l.backup() 140 return rune 141 } 142 143 // backup steps back one rune. Can only be called once per call of next. 144 func (l *lexer) backup() { 145 l.pos -= l.width 146 } 147 148 // emit passes an item back to the client. 149 func (l *lexer) emit(t itemType) { 150 l.items <- item{t, l.input[l.start:l.pos]} 151 l.start = l.pos 152 } 153 154 // ignore skips over the pending input before this point. 155 func (l *lexer) ignore() { 156 l.start = l.pos 157 } 158 159 // accept consumes the next rune if it's from the valid set. 160 func (l *lexer) accept(valid string) bool { 161 if strings.IndexRune(valid, l.next()) >= 0 { 162 return true 163 } 164 l.backup() 165 return false 166 } 167 168 // acceptRun consumes a run of runes from the valid set. 169 func (l *lexer) acceptRun(valid string) { 170 for strings.IndexRune(valid, l.next()) >= 0 { 171 } 172 l.backup() 173 } 174 175 // lineNumber reports which line we're on. Doing it this way 176 // means we don't have to worry about peek double counting. 177 func (l *lexer) lineNumber() int { 178 return 1 + strings.Count(l.input[:l.pos], "\n") 179 } 180 181 // error returns an error token and terminates the scan by passing 182 // back a nil pointer that will be the next state, terminating l.run. 183 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 184 l.items <- item{itemError, fmt.Sprintf(format, args...)} 185 return nil 186 } 187 188 // nextItem returns the next item from the input. 189 func (l *lexer) nextItem() item { 190 for { 191 select { 192 case item := <-l.items: 193 return item 194 default: 195 l.state = l.state(l) 196 } 197 } 198 panic("not reached") 199 } 200 201 // lex creates a new scanner for the input string. 202 func lex(name, input string) *lexer { 203 l := &lexer{ 204 name: name, 205 input: input, 206 state: lexText, 207 items: make(chan item, 2), // Two items sufficient. 208 } 209 return l 210 } 211 212 // state functions 213 214 const ( 215 leftDelim = "{{" 216 rightDelim = "}}" 217 leftComment = "{{/*" 218 rightComment = "*/}}" 219 ) 220 221 // lexText scans until an opening action delimiter, "{{". 222 func lexText(l *lexer) stateFn { 223 for { 224 if strings.HasPrefix(l.input[l.pos:], leftDelim) { 225 if l.pos > l.start { 226 l.emit(itemText) 227 } 228 return lexLeftDelim 229 } 230 if l.next() == eof { 231 break 232 } 233 } 234 // Correctly reached EOF. 235 if l.pos > l.start { 236 l.emit(itemText) 237 } 238 l.emit(itemEOF) 239 return nil 240 } 241 242 // lexLeftDelim scans the left delimiter, which is known to be present. 243 func lexLeftDelim(l *lexer) stateFn { 244 if strings.HasPrefix(l.input[l.pos:], leftComment) { 245 return lexComment 246 } 247 l.pos += len(leftDelim) 248 l.emit(itemLeftDelim) 249 return lexInsideAction 250 } 251 252 // lexComment scans a comment. The left comment marker is known to be present. 253 func lexComment(l *lexer) stateFn { 254 i := strings.Index(l.input[l.pos:], rightComment) 255 if i < 0 { 256 return l.errorf("unclosed comment") 257 } 258 l.pos += i + len(rightComment) 259 l.ignore() 260 return lexText 261 } 262 263 // lexRightDelim scans the right delimiter, which is known to be present. 264 func lexRightDelim(l *lexer) stateFn { 265 l.pos += len(rightDelim) 266 l.emit(itemRightDelim) 267 return lexText 268 } 269 270 // lexInsideAction scans the elements inside action delimiters. 271 func lexInsideAction(l *lexer) stateFn { 272 // Either number, quoted string, or identifier. 273 // Spaces separate and are ignored. 274 // Pipe symbols separate and are emitted. 275 for { 276 if strings.HasPrefix(l.input[l.pos:], rightDelim) { 277 return lexRightDelim 278 } 279 switch r := l.next(); { 280 case r == eof || r == '\n': 281 return l.errorf("unclosed action") 282 case isSpace(r): 283 l.ignore() 284 case r == '|': 285 l.emit(itemPipe) 286 case r == '"': 287 return lexQuote 288 case r == '`': 289 return lexRawQuote 290 case r == '.': 291 // special look-ahead for ".field" so we don't break l.backup(). 292 if l.pos < len(l.input) { 293 r := l.input[l.pos] 294 if r < '0' || '9' < r { 295 return lexIdentifier // itemDot comes from the keyword table. 296 } 297 } 298 fallthrough // '.' can start a number. 299 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 300 l.backup() 301 return lexNumber 302 case isAlphaNumeric(r): 303 l.backup() 304 return lexIdentifier 305 default: 306 return l.errorf("unrecognized character in action: %#U", r) 307 } 308 } 309 return nil 310 } 311 312 // lexIdentifier scans an alphanumeric or field. 313 func lexIdentifier(l *lexer) stateFn { 314 Loop: 315 for { 316 switch r := l.next(); { 317 case isAlphaNumeric(r): 318 // absorb. 319 case r == '.' && l.input[l.start] == '.': 320 // field chaining; absorb into one token. 321 default: 322 l.backup() 323 word := l.input[l.start:l.pos] 324 switch { 325 case key[word] > itemKeyword: 326 l.emit(key[word]) 327 case word[0] == '.': 328 l.emit(itemField) 329 case word == "true", word == "false": 330 l.emit(itemBool) 331 default: 332 l.emit(itemIdentifier) 333 } 334 break Loop 335 } 336 } 337 return lexInsideAction 338 } 339 340 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 341 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 342 // and "089" - but when it's wrong the input is invalid and the parser (via 343 // strconv) will notice. 344 func lexNumber(l *lexer) stateFn { 345 if !l.scanNumber() { 346 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 347 } 348 if sign := l.peek(); sign == '+' || sign == '-' { 349 // Complex: 1+2i. No spaces, must end in 'i'. 350 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 351 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 352 } 353 l.emit(itemComplex) 354 } else { 355 l.emit(itemNumber) 356 } 357 return lexInsideAction 358 } 359 360 func (l *lexer) scanNumber() bool { 361 // Optional leading sign. 362 l.accept("+-") 363 // Is it hex? 364 digits := "0123456789" 365 if l.accept("0") && l.accept("xX") { 366 digits = "0123456789abcdefABCDEF" 367 } 368 l.acceptRun(digits) 369 if l.accept(".") { 370 l.acceptRun(digits) 371 } 372 if l.accept("eE") { 373 l.accept("+-") 374 l.acceptRun("0123456789") 375 } 376 // Is it imaginary? 377 l.accept("i") 378 // Next thing mustn't be alphanumeric. 379 if isAlphaNumeric(l.peek()) { 380 l.next() 381 return false 382 } 383 return true 384 } 385 386 // lexQuote scans a quoted string. 387 func lexQuote(l *lexer) stateFn { 388 Loop: 389 for { 390 switch l.next() { 391 case '\\': 392 if r := l.next(); r != eof && r != '\n' { 393 break 394 } 395 fallthrough 396 case eof, '\n': 397 return l.errorf("unterminated quoted string") 398 case '"': 399 break Loop 400 } 401 } 402 l.emit(itemString) 403 return lexInsideAction 404 } 405 406 // lexRawQuote scans a raw quoted string. 407 func lexRawQuote(l *lexer) stateFn { 408 Loop: 409 for { 410 switch l.next() { 411 case eof, '\n': 412 return l.errorf("unterminated raw quoted string") 413 case '`': 414 break Loop 415 } 416 } 417 l.emit(itemRawString) 418 return lexInsideAction 419 } 420 421 // isSpace reports whether r is a space character. 422 func isSpace(r int) bool { 423 switch r { 424 case ' ', '\t', '\n', '\r': 425 return true 426 } 427 return false 428 } 429 430 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 431 func isAlphaNumeric(r int) bool { 432 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 433 }