github.com/klaytn/klaytn@v1.12.1/blockchain/asm/lexer.go (about) 1 // Modifications Copyright 2018 The klaytn Authors 2 // Copyright 2017 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from core/asm/lexer.go (2018/06/04). 19 // Modified and improved for the klaytn development. 20 21 package asm 22 23 import ( 24 "fmt" 25 "os" 26 "strings" 27 "unicode" 28 "unicode/utf8" 29 ) 30 31 // stateFn is used through the lifetime of the 32 // lexer to parse the different values at the 33 // current state. 34 type stateFn func(*lexer) stateFn 35 36 // token is emitted when the lexer has discovered 37 // a new parsable token. These are delivered over 38 // the tokens channels of the lexer 39 type token struct { 40 typ tokenType 41 lineno int 42 text string 43 } 44 45 // tokenType are the different types the lexer 46 // is able to parse and return. 47 type tokenType int 48 49 const ( 50 eof tokenType = iota // end of file 51 lineStart // emitted when a line starts 52 lineEnd // emitted when a line ends 53 invalidStatement // any invalid statement 54 element // any element during element parsing 55 label // label is emitted when a label is found 56 labelDef // label definition is emitted when a new label is found 57 number // number is emitted when a number is found 58 stringValue // stringValue is emitted when a string has been found 59 60 Numbers = "1234567890" // characters representing any decimal number 61 HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal 62 Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric 63 ) 64 65 // String implements stringer 66 func (it tokenType) String() string { 67 if int(it) > len(stringtokenTypes) { 68 return "invalid" 69 } 70 return stringtokenTypes[it] 71 } 72 73 var stringtokenTypes = []string{ 74 eof: "EOF", 75 invalidStatement: "invalid statement", 76 element: "element", 77 lineEnd: "end of line", 78 lineStart: "new line", 79 label: "label", 80 labelDef: "label definition", 81 number: "number", 82 stringValue: "string", 83 } 84 85 // lexer is the basic construct for parsing 86 // source code and turning them in to tokens. 87 // Tokens are interpreted by the compiler. 88 type lexer struct { 89 input string // input contains the source code of the program 90 91 tokens chan token // tokens is used to deliver tokens to the listener 92 state stateFn // the current state function 93 94 lineno int // current line number in the source file 95 start, pos, width int // positions for lexing and returning value 96 97 debug bool // flag for triggering debug output 98 } 99 100 // lex lexes the program by name with the given source. It returns a 101 // channel on which the tokens are delivered. 102 func Lex(source []byte, debug bool) <-chan token { 103 ch := make(chan token) 104 l := &lexer{ 105 input: string(source), 106 tokens: ch, 107 state: lexLine, 108 debug: debug, 109 } 110 go func() { 111 l.emit(lineStart) 112 for l.state != nil { 113 l.state = l.state(l) 114 } 115 l.emit(eof) 116 close(l.tokens) 117 }() 118 119 return ch 120 } 121 122 // next returns the next rune in the program's source. 123 func (l *lexer) next() (rune rune) { 124 if l.pos >= len(l.input) { 125 l.width = 0 126 return 0 127 } 128 rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 129 l.pos += l.width 130 return rune 131 } 132 133 // backup backsup the last parsed element (multi-character) 134 func (l *lexer) backup() { 135 l.pos -= l.width 136 } 137 138 // peek returns the next rune but does not advance the seeker 139 func (l *lexer) peek() rune { 140 r := l.next() 141 l.backup() 142 return r 143 } 144 145 // ignore advances the seeker and ignores the value 146 func (l *lexer) ignore() { 147 l.start = l.pos 148 } 149 150 // Accepts checks whether the given input matches the next rune 151 func (l *lexer) accept(valid string) bool { 152 if strings.ContainsRune(valid, l.next()) { 153 return true 154 } 155 156 l.backup() 157 158 return false 159 } 160 161 // acceptRun will continue to advance the seeker until valid 162 // can no longer be met. 163 func (l *lexer) acceptRun(valid string) { 164 for strings.ContainsRune(valid, l.next()) { 165 } 166 l.backup() 167 } 168 169 // acceptRunUntil is the inverse of acceptRun and will continue 170 // to advance the seeker until the rune has been found. 171 func (l *lexer) acceptRunUntil(until rune) bool { 172 // Continues running until a rune is found 173 for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() { 174 if i == 0 { 175 return false 176 } 177 } 178 179 return true 180 } 181 182 // blob returns the current value 183 func (l *lexer) blob() string { 184 return l.input[l.start:l.pos] 185 } 186 187 // Emits a new token on to token channel for processing 188 func (l *lexer) emit(t tokenType) { 189 token := token{t, l.lineno, l.blob()} 190 191 if l.debug { 192 fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) 193 } 194 195 l.tokens <- token 196 l.start = l.pos 197 } 198 199 // lexLine is state function for lexing lines 200 func lexLine(l *lexer) stateFn { 201 for { 202 switch r := l.next(); { 203 case r == '\n': 204 l.emit(lineEnd) 205 l.ignore() 206 l.lineno++ 207 208 l.emit(lineStart) 209 case r == ';' && l.peek() == ';': 210 return lexComment 211 case isSpace(r): 212 l.ignore() 213 case isLetter(r) || r == '_': 214 return lexElement 215 case isNumber(r): 216 return lexNumber 217 case r == '@': 218 l.ignore() 219 return lexLabel 220 case r == '"': 221 return lexInsideString 222 default: 223 return nil 224 } 225 } 226 } 227 228 // lexComment parses the current position until the end 229 // of the line and discards the text. 230 func lexComment(l *lexer) stateFn { 231 l.acceptRunUntil('\n') 232 l.ignore() 233 234 return lexLine 235 } 236 237 // lexLabel parses the current label, emits and returns 238 // the lex text state function to advance the parsing 239 // process. 240 func lexLabel(l *lexer) stateFn { 241 l.acceptRun(Alpha + "_") 242 243 l.emit(label) 244 245 return lexLine 246 } 247 248 // lexInsideString lexes the inside of a string until 249 // the state function finds the closing quote. 250 // It returns the lex text state function. 251 func lexInsideString(l *lexer) stateFn { 252 if l.acceptRunUntil('"') { 253 l.emit(stringValue) 254 } 255 256 return lexLine 257 } 258 259 func lexNumber(l *lexer) stateFn { 260 acceptance := Numbers 261 if l.accept("0") || l.accept("xX") { 262 acceptance = HexadecimalNumbers 263 } 264 l.acceptRun(acceptance) 265 266 l.emit(number) 267 268 return lexLine 269 } 270 271 func lexElement(l *lexer) stateFn { 272 l.acceptRun(Alpha + "_" + Numbers) 273 274 if l.peek() == ':' { 275 l.emit(labelDef) 276 277 l.accept(":") 278 l.ignore() 279 } else { 280 l.emit(element) 281 } 282 return lexLine 283 } 284 285 func isLetter(t rune) bool { 286 return unicode.IsLetter(t) 287 } 288 289 func isSpace(t rune) bool { 290 return unicode.IsSpace(t) 291 } 292 293 func isNumber(t rune) bool { 294 return unicode.IsNumber(t) 295 }