github.com/core-coin/go-core/v2@v2.1.9/core/asm/lexer.go (about) 1 // Copyright 2017 by the Authors 2 // This file is part of the go-core library. 3 // 4 // The go-core library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-core library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-core library. If not, see <http://www.gnu.org/licenses/>. 16 17 package asm 18 19 import ( 20 "fmt" 21 "os" 22 "strings" 23 "unicode" 24 "unicode/utf8" 25 ) 26 27 // stateFn is used through the lifetime of the 28 // lexer to parse the different values at the 29 // current state. 30 type stateFn func(*lexer) stateFn 31 32 // token is emitted when the lexer has discovered 33 // a new parsable token. These are delivered over 34 // the tokens channels of the lexer 35 type token struct { 36 typ tokenType 37 lineno int 38 text string 39 } 40 41 // tokenType are the different types the lexer 42 // is able to parse and return. 43 type tokenType int 44 45 const ( 46 eof tokenType = iota // end of file 47 lineStart // emitted when a line starts 48 lineEnd // emitted when a line ends 49 invalidStatement // any invalid statement 50 element // any element during element parsing 51 label // label is emitted when a label is found 52 labelDef // label definition is emitted when a new label is found 53 number // number is emitted when a number is found 54 stringValue // stringValue is emitted when a string has been found 55 56 Numbers = "1234567890" // characters representing any decimal number 57 HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal 58 Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric 59 ) 60 61 // String implements stringer 62 func (it tokenType) String() string { 63 if int(it) > len(stringtokenTypes) { 64 return "invalid" 65 } 66 return stringtokenTypes[it] 67 } 68 69 var stringtokenTypes = []string{ 70 eof: "EOF", 71 invalidStatement: "invalid statement", 72 element: "element", 73 lineEnd: "end of line", 74 lineStart: "new line", 75 label: "label", 76 labelDef: "label definition", 77 number: "number", 78 stringValue: "string", 79 } 80 81 // lexer is the basic construct for parsing 82 // source code and turning them in to tokens. 83 // Tokens are interpreted by the compiler. 84 type lexer struct { 85 input string // input contains the source code of the program 86 87 tokens chan token // tokens is used to deliver tokens to the listener 88 state stateFn // the current state function 89 90 lineno int // current line number in the source file 91 start, pos, width int // positions for lexing and returning value 92 93 debug bool // flag for triggering debug output 94 } 95 96 // lex lexes the program by name with the given source. It returns a 97 // channel on which the tokens are delivered. 98 func Lex(source []byte, debug bool) <-chan token { 99 ch := make(chan token) 100 l := &lexer{ 101 input: string(source), 102 tokens: ch, 103 state: lexLine, 104 debug: debug, 105 } 106 go func() { 107 l.emit(lineStart) 108 for l.state != nil { 109 l.state = l.state(l) 110 } 111 l.emit(eof) 112 close(l.tokens) 113 }() 114 115 return ch 116 } 117 118 // next returns the next rune in the program's source. 119 func (l *lexer) next() (rune rune) { 120 if l.pos >= len(l.input) { 121 l.width = 0 122 return 0 123 } 124 rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 125 l.pos += l.width 126 return rune 127 } 128 129 // backup backsup the last parsed element (multi-character) 130 func (l *lexer) backup() { 131 l.pos -= l.width 132 } 133 134 // peek returns the next rune but does not advance the seeker 135 func (l *lexer) peek() rune { 136 r := l.next() 137 l.backup() 138 return r 139 } 140 141 // ignore advances the seeker and ignores the value 142 func (l *lexer) ignore() { 143 l.start = l.pos 144 } 145 146 // Accepts checks whether the given input matches the next rune 147 func (l *lexer) accept(valid string) bool { 148 if strings.ContainsRune(valid, l.next()) { 149 return true 150 } 151 152 l.backup() 153 154 return false 155 } 156 157 // acceptRun will continue to advance the seeker until valid 158 // can no longer be met. 159 func (l *lexer) acceptRun(valid string) { 160 for strings.ContainsRune(valid, l.next()) { 161 } 162 l.backup() 163 } 164 165 // acceptRunUntil is the inverse of acceptRun and will continue 166 // to advance the seeker until the rune has been found. 167 func (l *lexer) acceptRunUntil(until rune) bool { 168 // Continues running until a rune is found 169 for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() { 170 if i == 0 { 171 return false 172 } 173 } 174 175 return true 176 } 177 178 // blob returns the current value 179 func (l *lexer) blob() string { 180 return l.input[l.start:l.pos] 181 } 182 183 // Emits a new token on to token channel for processing 184 func (l *lexer) emit(t tokenType) { 185 token := token{t, l.lineno, l.blob()} 186 187 if l.debug { 188 fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) 189 } 190 191 l.tokens <- token 192 l.start = l.pos 193 } 194 195 // lexLine is state function for lexing lines 196 func lexLine(l *lexer) stateFn { 197 for { 198 switch r := l.next(); { 199 case r == '\n': 200 l.emit(lineEnd) 201 l.ignore() 202 l.lineno++ 203 204 l.emit(lineStart) 205 case r == ';' && l.peek() == ';': 206 return lexComment 207 case isSpace(r): 208 l.ignore() 209 case isLetter(r) || r == '_': 210 return lexElement 211 case isNumber(r): 212 return lexNumber 213 case r == '@': 214 l.ignore() 215 return lexLabel 216 case r == '"': 217 return lexInsideString 218 default: 219 return nil 220 } 221 } 222 } 223 224 // lexComment parses the current position until the end 225 // of the line and discards the text. 226 func lexComment(l *lexer) stateFn { 227 l.acceptRunUntil('\n') 228 l.ignore() 229 230 return lexLine 231 } 232 233 // lexLabel parses the current label, emits and returns 234 // the lex text state function to advance the parsing 235 // process. 236 func lexLabel(l *lexer) stateFn { 237 l.acceptRun(Alpha + "_" + Numbers) 238 239 l.emit(label) 240 241 return lexLine 242 } 243 244 // lexInsideString lexes the inside of a string until 245 // the state function finds the closing quote. 246 // It returns the lex text state function. 247 func lexInsideString(l *lexer) stateFn { 248 if l.acceptRunUntil('"') { 249 l.emit(stringValue) 250 } 251 252 return lexLine 253 } 254 255 func lexNumber(l *lexer) stateFn { 256 acceptance := Numbers 257 if l.accept("0") || l.accept("xX") { 258 acceptance = HexadecimalNumbers 259 } 260 l.acceptRun(acceptance) 261 262 l.emit(number) 263 264 return lexLine 265 } 266 267 func lexElement(l *lexer) stateFn { 268 l.acceptRun(Alpha + "_" + Numbers) 269 270 if l.peek() == ':' { 271 l.emit(labelDef) 272 273 l.accept(":") 274 l.ignore() 275 } else { 276 l.emit(element) 277 } 278 return lexLine 279 } 280 281 func isLetter(t rune) bool { 282 return unicode.IsLetter(t) 283 } 284 285 func isSpace(t rune) bool { 286 return unicode.IsSpace(t) 287 } 288 289 func isNumber(t rune) bool { 290 return unicode.IsNumber(t) 291 }