github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/core/asm/lexer.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package asm 18 19 import ( 20 "fmt" 21 "os" 22 "strings" 23 "unicode" 24 "unicode/utf8" 25 ) 26 27 // stateFn is used through the lifetime of the 28 // lexer to parse the different values at the 29 // current state. 30 type stateFn func(*lexer) stateFn 31 32 // token is emitted when the lexer has discovered 33 // a new parsable token. These are delivered over 34 // the tokens channels of the lexer 35 type token struct { 36 typ tokenType 37 lineno int 38 text string 39 } 40 41 // tokenType are the different types the lexer 42 // is able to parse and return. 43 type tokenType int 44 45 //go:generate go run golang.org/x/tools/cmd/stringer -type tokenType 46 47 const ( 48 eof tokenType = iota // end of file 49 lineStart // emitted when a line starts 50 lineEnd // emitted when a line ends 51 invalidStatement // any invalid statement 52 element // any element during element parsing 53 label // label is emitted when a label is found 54 labelDef // label definition is emitted when a new label is found 55 number // number is emitted when a number is found 56 stringValue // stringValue is emitted when a string has been found 57 ) 58 59 const ( 60 decimalNumbers = "1234567890" // characters representing any decimal number 61 hexNumbers = decimalNumbers + "aAbBcCdDeEfF" // characters representing any hexadecimal 62 alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric 63 ) 64 65 // lexer is the basic construct for parsing 66 // source code and turning them in to tokens. 67 // Tokens are interpreted by the compiler. 68 type lexer struct { 69 input string // input contains the source code of the program 70 71 tokens chan token // tokens is used to deliver tokens to the listener 72 state stateFn // the current state function 73 74 lineno int // current line number in the source file 75 start, pos, width int // positions for lexing and returning value 76 77 debug bool // flag for triggering debug output 78 } 79 80 // Lex lexes the program by name with the given source. It returns a 81 // channel on which the tokens are delivered. 82 func Lex(source []byte, debug bool) <-chan token { 83 ch := make(chan token) 84 l := &lexer{ 85 input: string(source), 86 tokens: ch, 87 state: lexLine, 88 debug: debug, 89 } 90 go func() { 91 l.emit(lineStart) 92 for l.state != nil { 93 l.state = l.state(l) 94 } 95 l.emit(eof) 96 close(l.tokens) 97 }() 98 99 return ch 100 } 101 102 // next returns the next rune in the program's source. 103 func (l *lexer) next() (rune rune) { 104 if l.pos >= len(l.input) { 105 l.width = 0 106 return 0 107 } 108 rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 109 l.pos += l.width 110 return rune 111 } 112 113 // backup backsup the last parsed element (multi-character) 114 func (l *lexer) backup() { 115 l.pos -= l.width 116 } 117 118 // peek returns the next rune but does not advance the seeker 119 func (l *lexer) peek() rune { 120 r := l.next() 121 l.backup() 122 return r 123 } 124 125 // ignore advances the seeker and ignores the value 126 func (l *lexer) ignore() { 127 l.start = l.pos 128 } 129 130 // accept checks whether the given input matches the next rune 131 func (l *lexer) accept(valid string) bool { 132 if strings.ContainsRune(valid, l.next()) { 133 return true 134 } 135 136 l.backup() 137 138 return false 139 } 140 141 // acceptRun will continue to advance the seeker until valid 142 // can no longer be met. 143 func (l *lexer) acceptRun(valid string) { 144 for strings.ContainsRune(valid, l.next()) { 145 } 146 l.backup() 147 } 148 149 // acceptRunUntil is the inverse of acceptRun and will continue 150 // to advance the seeker until the rune has been found. 151 func (l *lexer) acceptRunUntil(until rune) bool { 152 // Continues running until a rune is found 153 for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() { 154 if i == 0 { 155 return false 156 } 157 } 158 159 return true 160 } 161 162 // blob returns the current value 163 func (l *lexer) blob() string { 164 return l.input[l.start:l.pos] 165 } 166 167 // Emits a new token on to token channel for processing 168 func (l *lexer) emit(t tokenType) { 169 token := token{t, l.lineno, l.blob()} 170 171 if l.debug { 172 fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) 173 } 174 175 l.tokens <- token 176 l.start = l.pos 177 } 178 179 // lexLine is state function for lexing lines 180 func lexLine(l *lexer) stateFn { 181 for { 182 switch r := l.next(); { 183 case r == '\n': 184 l.emit(lineEnd) 185 l.ignore() 186 l.lineno++ 187 l.emit(lineStart) 188 case r == ';' && l.peek() == ';': 189 return lexComment 190 case isSpace(r): 191 l.ignore() 192 case isLetter(r) || r == '_': 193 return lexElement 194 case isNumber(r): 195 return lexNumber 196 case r == '@': 197 l.ignore() 198 return lexLabel 199 case r == '"': 200 return lexInsideString 201 default: 202 return nil 203 } 204 } 205 } 206 207 // lexComment parses the current position until the end 208 // of the line and discards the text. 209 func lexComment(l *lexer) stateFn { 210 l.acceptRunUntil('\n') 211 l.backup() 212 l.ignore() 213 214 return lexLine 215 } 216 217 // lexLabel parses the current label, emits and returns 218 // the lex text state function to advance the parsing 219 // process. 220 func lexLabel(l *lexer) stateFn { 221 l.acceptRun(alpha + "_" + decimalNumbers) 222 223 l.emit(label) 224 225 return lexLine 226 } 227 228 // lexInsideString lexes the inside of a string until 229 // the state function finds the closing quote. 230 // It returns the lex text state function. 231 func lexInsideString(l *lexer) stateFn { 232 if l.acceptRunUntil('"') { 233 l.emit(stringValue) 234 } 235 236 return lexLine 237 } 238 239 func lexNumber(l *lexer) stateFn { 240 acceptance := decimalNumbers 241 if l.accept("xX") { 242 acceptance = hexNumbers 243 } 244 l.acceptRun(acceptance) 245 246 l.emit(number) 247 248 return lexLine 249 } 250 251 func lexElement(l *lexer) stateFn { 252 l.acceptRun(alpha + "_" + decimalNumbers) 253 254 if l.peek() == ':' { 255 l.emit(labelDef) 256 257 l.accept(":") 258 l.ignore() 259 } else { 260 l.emit(element) 261 } 262 return lexLine 263 } 264 265 func isLetter(t rune) bool { 266 return unicode.IsLetter(t) 267 } 268 269 func isSpace(t rune) bool { 270 return unicode.IsSpace(t) 271 } 272 273 func isNumber(t rune) bool { 274 return unicode.IsNumber(t) 275 }