github.com/arieschain/arieschain@v0.0.0-20191023063405-37c074544356/core/asm/lexer.go (about) 1 package asm 2 3 import ( 4 "fmt" 5 "os" 6 "strings" 7 "unicode" 8 "unicode/utf8" 9 ) 10 11 // stateFn is used through the lifetime of the 12 // lexer to parse the different values at the 13 // current state. 14 type stateFn func(*lexer) stateFn 15 16 // token is emitted when the lexer has discovered 17 // a new parsable token. These are delivered over 18 // the tokens channels of the lexer 19 type token struct { 20 typ tokenType 21 lineno int 22 text string 23 } 24 25 // tokenType are the different types the lexer 26 // is able to parse and return. 27 type tokenType int 28 29 const ( 30 eof tokenType = iota // end of file 31 lineStart // emitted when a line starts 32 lineEnd // emitted when a line ends 33 invalidStatement // any invalid statement 34 element // any element during element parsing 35 label // label is emitted when a label is found 36 labelDef // label definition is emitted when a new label is found 37 number // number is emitted when a number is found 38 stringValue // stringValue is emitted when a string has been found 39 40 Numbers = "1234567890" // characters representing any decimal number 41 HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal 42 Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric 43 ) 44 45 // String implements stringer 46 func (it tokenType) String() string { 47 if int(it) > len(stringtokenTypes) { 48 return "invalid" 49 } 50 return stringtokenTypes[it] 51 } 52 53 var stringtokenTypes = []string{ 54 eof: "EOF", 55 invalidStatement: "invalid statement", 56 element: "element", 57 lineEnd: "end of line", 58 lineStart: "new line", 59 label: "label", 60 labelDef: "label definition", 61 number: "number", 62 stringValue: "string", 63 } 64 65 // lexer is the basic construct for parsing 66 // source code and turning them in to tokens. 67 // Tokens are interpreted by the compiler. 68 type lexer struct { 69 input string // input contains the source code of the program 70 71 tokens chan token // tokens is used to deliver tokens to the listener 72 state stateFn // the current state function 73 74 lineno int // current line number in the source file 75 start, pos, width int // positions for lexing and returning value 76 77 debug bool // flag for triggering debug output 78 } 79 80 // lex lexes the program by name with the given source. It returns a 81 // channel on which the tokens are delivered. 82 func Lex(name string, source []byte, debug bool) <-chan token { 83 ch := make(chan token) 84 l := &lexer{ 85 input: string(source), 86 tokens: ch, 87 state: lexLine, 88 debug: debug, 89 } 90 go func() { 91 l.emit(lineStart) 92 for l.state != nil { 93 l.state = l.state(l) 94 } 95 l.emit(eof) 96 close(l.tokens) 97 }() 98 99 return ch 100 } 101 102 // next returns the next rune in the program's source. 103 func (l *lexer) next() (rune rune) { 104 if l.pos >= len(l.input) { 105 l.width = 0 106 return 0 107 } 108 rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 109 l.pos += l.width 110 return rune 111 } 112 113 // backup backsup the last parsed element (multi-character) 114 func (l *lexer) backup() { 115 l.pos -= l.width 116 } 117 118 // peek returns the next rune but does not advance the seeker 119 func (l *lexer) peek() rune { 120 r := l.next() 121 l.backup() 122 return r 123 } 124 125 // ignore advances the seeker and ignores the value 126 func (l *lexer) ignore() { 127 l.start = l.pos 128 } 129 130 // Accepts checks whether the given input matches the next rune 131 func (l *lexer) accept(valid string) bool { 132 if strings.ContainsRune(valid, l.next()) { 133 return true 134 } 135 136 l.backup() 137 138 return false 139 } 140 141 // acceptRun will continue to advance the seeker until valid 142 // can no longer be met. 143 func (l *lexer) acceptRun(valid string) { 144 for strings.ContainsRune(valid, l.next()) { 145 } 146 l.backup() 147 } 148 149 // acceptRunUntil is the inverse of acceptRun and will continue 150 // to advance the seeker until the rune has been found. 151 func (l *lexer) acceptRunUntil(until rune) bool { 152 // Continues running until a rune is found 153 for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() { 154 if i == 0 { 155 return false 156 } 157 } 158 159 return true 160 } 161 162 // blob returns the current value 163 func (l *lexer) blob() string { 164 return l.input[l.start:l.pos] 165 } 166 167 // Emits a new token on to token channel for processing 168 func (l *lexer) emit(t tokenType) { 169 token := token{t, l.lineno, l.blob()} 170 171 if l.debug { 172 fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) 173 } 174 175 l.tokens <- token 176 l.start = l.pos 177 } 178 179 // lexLine is state function for lexing lines 180 func lexLine(l *lexer) stateFn { 181 for { 182 switch r := l.next(); { 183 case r == '\n': 184 l.emit(lineEnd) 185 l.ignore() 186 l.lineno++ 187 188 l.emit(lineStart) 189 case r == ';' && l.peek() == ';': 190 return lexComment 191 case isSpace(r): 192 l.ignore() 193 case isLetter(r) || r == '_': 194 return lexElement 195 case isNumber(r): 196 return lexNumber 197 case r == '@': 198 l.ignore() 199 return lexLabel 200 case r == '"': 201 return lexInsideString 202 default: 203 return nil 204 } 205 } 206 } 207 208 // lexComment parses the current position until the end 209 // of the line and discards the text. 210 func lexComment(l *lexer) stateFn { 211 l.acceptRunUntil('\n') 212 l.ignore() 213 214 return lexLine 215 } 216 217 // lexLabel parses the current label, emits and returns 218 // the lex text state function to advance the parsing 219 // process. 220 func lexLabel(l *lexer) stateFn { 221 l.acceptRun(Alpha + "_") 222 223 l.emit(label) 224 225 return lexLine 226 } 227 228 // lexInsideString lexes the inside of a string until 229 // until the state function finds the closing quote. 230 // It returns the lex text state function. 231 func lexInsideString(l *lexer) stateFn { 232 if l.acceptRunUntil('"') { 233 l.emit(stringValue) 234 } 235 236 return lexLine 237 } 238 239 func lexNumber(l *lexer) stateFn { 240 acceptance := Numbers 241 if l.accept("0") || l.accept("xX") { 242 acceptance = HexadecimalNumbers 243 } 244 l.acceptRun(acceptance) 245 246 l.emit(number) 247 248 return lexLine 249 } 250 251 func lexElement(l *lexer) stateFn { 252 l.acceptRun(Alpha + "_" + Numbers) 253 254 if l.peek() == ':' { 255 l.emit(labelDef) 256 257 l.accept(":") 258 l.ignore() 259 } else { 260 l.emit(element) 261 } 262 return lexLine 263 } 264 265 func isLetter(t rune) bool { 266 return unicode.IsLetter(t) 267 } 268 269 func isSpace(t rune) bool { 270 return unicode.IsSpace(t) 271 } 272 273 func isNumber(t rune) bool { 274 return unicode.IsNumber(t) 275 }