github.com/servernoj/jade@v0.0.0-20231225191405-efec98d19db1/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package jade 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 line int // The line number at the start of this item. 20 depth int 21 } 22 23 func (i item) String() string { 24 switch { 25 case i.typ == itemEOF: 26 return "EOF" 27 case i.typ == itemError: 28 return i.val 29 // case i.typ > itemKeyword: 30 // return fmt.Sprintf("<%s>", i.val) 31 case len(i.val) > 10: 32 return fmt.Sprintf("%.10q...", i.val) 33 } 34 return fmt.Sprintf("%q", i.val) 35 } 36 37 const ( 38 eof = -1 39 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 40 ) 41 42 // stateFn represents the state of the scanner as a function that returns the next state. 43 type stateFn func(*lexer) stateFn 44 45 // lexer holds the state of the scanner. 46 type lexer struct { 47 name string // the name of the input; used only for error reports 48 input string // the string being scanned 49 pos pos // current position in the input 50 start pos // start position of this item 51 width pos // width of last rune read from input 52 items chan item // channel of scanned items 53 line int // 1+number of newlines seen 54 55 depth int // current tag depth 56 interpolation int // interpolation depth 57 longtext bool // long text flag 58 } 59 60 // next returns the next rune in the input. 61 func (l *lexer) next() rune { 62 if int(l.pos) >= len(l.input) { 63 l.width = 0 64 return eof 65 } 66 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 67 l.width = pos(w) 68 l.pos += l.width 69 if r == '\n' { 70 l.line++ 71 } 72 return r 73 } 74 75 // peek returns but does not consume the next rune in the input. 76 func (l *lexer) peek() rune { 77 r := l.next() 78 l.backup() 79 return r 80 } 81 82 // backup steps back one rune. Can only be called once per call of next. 83 func (l *lexer) backup() { 84 l.pos -= l.width 85 // Correct newline count. 86 if l.width == 1 && l.input[l.pos] == '\n' { 87 l.line-- 88 } 89 } 90 91 // emit passes an item back to the client. 92 func (l *lexer) emit(t itemType) { 93 l.items <- item{t, l.start, l.input[l.start:l.pos], l.line, l.depth} 94 // Some items contain text internally. If so, count their newlines. 95 switch t { 96 // case itemText, itemRawString, itemLeftDelim, itemRightDelim: 97 case itemText: 98 l.line += strings.Count(l.input[l.start:l.pos], "\n") 99 } 100 l.start = l.pos 101 } 102 103 // ignore skips over the pending input before this point. 104 func (l *lexer) ignore() { 105 l.line += strings.Count(l.input[l.start:l.pos], "\n") 106 l.start = l.pos 107 } 108 109 // accept consumes the next rune if it's from the valid set. 110 func (l *lexer) accept(valid string) bool { 111 if strings.ContainsRune(valid, l.next()) { 112 return true 113 } 114 l.backup() 115 return false 116 } 117 118 // acceptRun consumes a run of runes from the valid set. 119 func (l *lexer) acceptRun(valid string) { 120 for strings.ContainsRune(valid, l.next()) { 121 } 122 l.backup() 123 } 124 125 // errorf returns an error token and terminates the scan by passing 126 // back a nil pointer that will be the next state, terminating l.nextItem. 127 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 128 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line, l.depth} 129 return nil 130 } 131 132 // nextItem returns the next item from the input. 133 // Called by the parser, not in the lexing goroutine. 134 func (l *lexer) nextItem() item { 135 return <-l.items 136 } 137 138 // drain drains the output so the lexing goroutine will exit. 139 // Called by the parser, not in the lexing goroutine. 140 func (l *lexer) drain() { 141 for range l.items { 142 } 143 } 144 145 // lex creates a new scanner for the input string. 146 func lex(name string, input []byte) *lexer { 147 l := &lexer{ 148 name: name, 149 input: string(input), 150 items: make(chan item), 151 line: 1, 152 } 153 go l.run() 154 return l 155 } 156 157 func (l *lexer) run() { 158 for state := lexIndents; state != nil; { 159 state = state(l) 160 } 161 close(l.items) 162 } 163 164 // atTerminator reports whether the input is at valid termination character to 165 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 166 // like "$x+2" not being acceptable without a space, in case we decide one 167 // day to implement arithmetic. 168 func (l *lexer) atTerminator() bool { 169 r := l.peek() 170 if isSpace(r) || isEndOfLine(r) { 171 return true 172 } 173 switch r { 174 case eof, '.', ',', '|', ':', ')', '(': 175 return true 176 } 177 178 return false 179 } 180 181 func (l *lexer) scanNumber() bool { 182 // Optional leading sign. 183 l.accept("+-") 184 // Is it hex? 185 digits := "0123456789" 186 if l.accept("0") && l.accept("xX") { 187 digits = "0123456789abcdefABCDEF" 188 } 189 l.acceptRun(digits) 190 if l.accept(".") { 191 l.acceptRun(digits) 192 } 193 if l.accept("eE") { 194 l.accept("+-") 195 l.acceptRun("0123456789") 196 } 197 // Is it imaginary? 198 l.accept("i") 199 // Next thing mustn't be alphanumeric. 200 if isAlphaNumeric(l.peek()) { 201 l.next() 202 return false 203 } 204 return true 205 } 206 207 // isSpace reports whether r is a space character. 208 func isSpace(r rune) bool { 209 return r == ' ' || r == '\t' 210 } 211 212 // isEndOfLine reports whether r is an end-of-line character. 213 func isEndOfLine(r rune) bool { 214 return r == '\r' || r == '\n' 215 } 216 217 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 218 func isAlphaNumeric(r rune) bool { 219 return r == '_' || r == '-' || unicode.IsLetter(r) || unicode.IsDigit(r) 220 }