github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/core/asm/lexer.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // Copyright 2019 The go-aigar Authors 3 // This file is part of the go-aigar library. 4 // 5 // The go-aigar library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-aigar library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>. 17 18 package asm 19 20 import ( 21 "fmt" 22 "os" 23 "strings" 24 "unicode" 25 "unicode/utf8" 26 ) 27 28 // stateFn is used through the lifetime of the 29 // lexer to parse the different values at the 30 // current state. 31 type stateFn func(*lexer) stateFn 32 33 // token is emitted when the lexer has discovered 34 // a new parsable token. These are delivered over 35 // the tokens channels of the lexer 36 type token struct { 37 typ tokenType 38 lineno int 39 text string 40 } 41 42 // tokenType are the different types the lexer 43 // is able to parse and return. 44 type tokenType int 45 46 const ( 47 eof tokenType = iota // end of file 48 lineStart // emitted when a line starts 49 lineEnd // emitted when a line ends 50 invalidStatement // any invalid statement 51 element // any element during element parsing 52 label // label is emitted when a label is found 53 labelDef // label definition is emitted when a new label is found 54 number // number is emitted when a number is found 55 stringValue // stringValue is emitted when a string has been found 56 57 Numbers = "1234567890" // characters representing any decimal number 58 HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal 59 Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric 60 ) 61 62 // String implements stringer 63 func (it tokenType) String() string { 64 if int(it) > len(stringtokenTypes) { 65 return "invalid" 66 } 67 return stringtokenTypes[it] 68 } 69 70 var stringtokenTypes = []string{ 71 eof: "EOF", 72 invalidStatement: "invalid statement", 73 element: "element", 74 lineEnd: "end of line", 75 lineStart: "new line", 76 label: "label", 77 labelDef: "label definition", 78 number: "number", 79 stringValue: "string", 80 } 81 82 // lexer is the basic construct for parsing 83 // source code and turning them in to tokens. 84 // Tokens are interpreted by the compiler. 85 type lexer struct { 86 input string // input contains the source code of the program 87 88 tokens chan token // tokens is used to deliver tokens to the listener 89 state stateFn // the current state function 90 91 lineno int // current line number in the source file 92 start, pos, width int // positions for lexing and returning value 93 94 debug bool // flag for triggering debug output 95 } 96 97 // lex lexes the program by name with the given source. It returns a 98 // channel on which the tokens are delivered. 99 func Lex(source []byte, debug bool) <-chan token { 100 ch := make(chan token) 101 l := &lexer{ 102 input: string(source), 103 tokens: ch, 104 state: lexLine, 105 debug: debug, 106 } 107 go func() { 108 l.emit(lineStart) 109 for l.state != nil { 110 l.state = l.state(l) 111 } 112 l.emit(eof) 113 close(l.tokens) 114 }() 115 116 return ch 117 } 118 119 // next returns the next rune in the program's source. 120 func (l *lexer) next() (rune rune) { 121 if l.pos >= len(l.input) { 122 l.width = 0 123 return 0 124 } 125 rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 126 l.pos += l.width 127 return rune 128 } 129 130 // backup backsup the last parsed element (multi-character) 131 func (l *lexer) backup() { 132 l.pos -= l.width 133 } 134 135 // peek returns the next rune but does not advance the seeker 136 func (l *lexer) peek() rune { 137 r := l.next() 138 l.backup() 139 return r 140 } 141 142 // ignore advances the seeker and ignores the value 143 func (l *lexer) ignore() { 144 l.start = l.pos 145 } 146 147 // Accepts checks whether the given input matches the next rune 148 func (l *lexer) accept(valid string) bool { 149 if strings.ContainsRune(valid, l.next()) { 150 return true 151 } 152 153 l.backup() 154 155 return false 156 } 157 158 // acceptRun will continue to advance the seeker until valid 159 // can no longer be met. 160 func (l *lexer) acceptRun(valid string) { 161 for strings.ContainsRune(valid, l.next()) { 162 } 163 l.backup() 164 } 165 166 // acceptRunUntil is the inverse of acceptRun and will continue 167 // to advance the seeker until the rune has been found. 168 func (l *lexer) acceptRunUntil(until rune) bool { 169 // Continues running until a rune is found 170 for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() { 171 if i == 0 { 172 return false 173 } 174 } 175 176 return true 177 } 178 179 // blob returns the current value 180 func (l *lexer) blob() string { 181 return l.input[l.start:l.pos] 182 } 183 184 // Emits a new token on to token channel for processing 185 func (l *lexer) emit(t tokenType) { 186 token := token{t, l.lineno, l.blob()} 187 188 if l.debug { 189 fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) 190 } 191 192 l.tokens <- token 193 l.start = l.pos 194 } 195 196 // lexLine is state function for lexing lines 197 func lexLine(l *lexer) stateFn { 198 for { 199 switch r := l.next(); { 200 case r == '\n': 201 l.emit(lineEnd) 202 l.ignore() 203 l.lineno++ 204 205 l.emit(lineStart) 206 case r == ';' && l.peek() == ';': 207 return lexComment 208 case isSpace(r): 209 l.ignore() 210 case isLetter(r) || r == '_': 211 return lexElement 212 case isNumber(r): 213 return lexNumber 214 case r == '@': 215 l.ignore() 216 return lexLabel 217 case r == '"': 218 return lexInsideString 219 default: 220 return nil 221 } 222 } 223 } 224 225 // lexComment parses the current position until the end 226 // of the line and discards the text. 227 func lexComment(l *lexer) stateFn { 228 l.acceptRunUntil('\n') 229 l.ignore() 230 231 return lexLine 232 } 233 234 // lexLabel parses the current label, emits and returns 235 // the lex text state function to advance the parsing 236 // process. 237 func lexLabel(l *lexer) stateFn { 238 l.acceptRun(Alpha + "_") 239 240 l.emit(label) 241 242 return lexLine 243 } 244 245 // lexInsideString lexes the inside of a string until 246 // the state function finds the closing quote. 247 // It returns the lex text state function. 248 func lexInsideString(l *lexer) stateFn { 249 if l.acceptRunUntil('"') { 250 l.emit(stringValue) 251 } 252 253 return lexLine 254 } 255 256 func lexNumber(l *lexer) stateFn { 257 acceptance := Numbers 258 if l.accept("0") || l.accept("xX") { 259 acceptance = HexadecimalNumbers 260 } 261 l.acceptRun(acceptance) 262 263 l.emit(number) 264 265 return lexLine 266 } 267 268 func lexElement(l *lexer) stateFn { 269 l.acceptRun(Alpha + "_" + Numbers) 270 271 if l.peek() == ':' { 272 l.emit(labelDef) 273 274 l.accept(":") 275 l.ignore() 276 } else { 277 l.emit(element) 278 } 279 return lexLine 280 } 281 282 func isLetter(t rune) bool { 283 return unicode.IsLetter(t) 284 } 285 286 func isSpace(t rune) bool { 287 return unicode.IsSpace(t) 288 } 289 290 func isNumber(t rune) bool { 291 return unicode.IsNumber(t) 292 }