github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/core/asm/lexer.go (about)

     1  //  Copyright 2018 The go-ethereum Authors
     2  //  Copyright 2019 The go-aigar Authors
     3  //  This file is part of the go-aigar library.
     4  //
     5  //  The go-aigar library is free software: you can redistribute it and/or modify
     6  //  it under the terms of the GNU Lesser General Public License as published by
     7  //  the Free Software Foundation, either version 3 of the License, or
     8  //  (at your option) any later version.
     9  //
    10  //  The go-aigar library is distributed in the hope that it will be useful,
    11  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  //  GNU Lesser General Public License for more details.
    14  //
    15  //  You should have received a copy of the GNU Lesser General Public License
    16  //  along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package asm
    19  
    20  import (
    21  	"fmt"
    22  	"os"
    23  	"strings"
    24  	"unicode"
    25  	"unicode/utf8"
    26  )
    27  
    28  // stateFn is used through the lifetime of the
    29  // lexer to parse the different values at the
    30  // current state.
    31  type stateFn func(*lexer) stateFn
    32  
    33  // token is emitted when the lexer has discovered
    34  // a new parsable token. These are delivered over
    35  // the tokens channels of the lexer
    36  type token struct {
    37  	typ    tokenType
    38  	lineno int
    39  	text   string
    40  }
    41  
    42  // tokenType are the different types the lexer
    43  // is able to parse and return.
    44  type tokenType int
    45  
    46  const (
    47  	eof              tokenType = iota // end of file
    48  	lineStart                         // emitted when a line starts
    49  	lineEnd                           // emitted when a line ends
    50  	invalidStatement                  // any invalid statement
    51  	element                           // any element during element parsing
    52  	label                             // label is emitted when a label is found
    53  	labelDef                          // label definition is emitted when a new label is found
    54  	number                            // number is emitted when a number is found
    55  	stringValue                       // stringValue is emitted when a string has been found
    56  
    57  	Numbers            = "1234567890"                                           // characters representing any decimal number
    58  	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
    59  	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
    60  )
    61  
    62  // String implements stringer
    63  func (it tokenType) String() string {
    64  	if int(it) > len(stringtokenTypes) {
    65  		return "invalid"
    66  	}
    67  	return stringtokenTypes[it]
    68  }
    69  
    70  var stringtokenTypes = []string{
    71  	eof:              "EOF",
    72  	invalidStatement: "invalid statement",
    73  	element:          "element",
    74  	lineEnd:          "end of line",
    75  	lineStart:        "new line",
    76  	label:            "label",
    77  	labelDef:         "label definition",
    78  	number:           "number",
    79  	stringValue:      "string",
    80  }
    81  
    82  // lexer is the basic construct for parsing
    83  // source code and turning them in to tokens.
    84  // Tokens are interpreted by the compiler.
    85  type lexer struct {
    86  	input string // input contains the source code of the program
    87  
    88  	tokens chan token // tokens is used to deliver tokens to the listener
    89  	state  stateFn    // the current state function
    90  
    91  	lineno            int // current line number in the source file
    92  	start, pos, width int // positions for lexing and returning value
    93  
    94  	debug bool // flag for triggering debug output
    95  }
    96  
    97  // lex lexes the program by name with the given source. It returns a
    98  // channel on which the tokens are delivered.
    99  func Lex(source []byte, debug bool) <-chan token {
   100  	ch := make(chan token)
   101  	l := &lexer{
   102  		input:  string(source),
   103  		tokens: ch,
   104  		state:  lexLine,
   105  		debug:  debug,
   106  	}
   107  	go func() {
   108  		l.emit(lineStart)
   109  		for l.state != nil {
   110  			l.state = l.state(l)
   111  		}
   112  		l.emit(eof)
   113  		close(l.tokens)
   114  	}()
   115  
   116  	return ch
   117  }
   118  
   119  // next returns the next rune in the program's source.
   120  func (l *lexer) next() (rune rune) {
   121  	if l.pos >= len(l.input) {
   122  		l.width = 0
   123  		return 0
   124  	}
   125  	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   126  	l.pos += l.width
   127  	return rune
   128  }
   129  
   130  // backup backsup the last parsed element (multi-character)
   131  func (l *lexer) backup() {
   132  	l.pos -= l.width
   133  }
   134  
   135  // peek returns the next rune but does not advance the seeker
   136  func (l *lexer) peek() rune {
   137  	r := l.next()
   138  	l.backup()
   139  	return r
   140  }
   141  
   142  // ignore advances the seeker and ignores the value
   143  func (l *lexer) ignore() {
   144  	l.start = l.pos
   145  }
   146  
   147  // Accepts checks whether the given input matches the next rune
   148  func (l *lexer) accept(valid string) bool {
   149  	if strings.ContainsRune(valid, l.next()) {
   150  		return true
   151  	}
   152  
   153  	l.backup()
   154  
   155  	return false
   156  }
   157  
   158  // acceptRun will continue to advance the seeker until valid
   159  // can no longer be met.
   160  func (l *lexer) acceptRun(valid string) {
   161  	for strings.ContainsRune(valid, l.next()) {
   162  	}
   163  	l.backup()
   164  }
   165  
   166  // acceptRunUntil is the inverse of acceptRun and will continue
   167  // to advance the seeker until the rune has been found.
   168  func (l *lexer) acceptRunUntil(until rune) bool {
   169  	// Continues running until a rune is found
   170  	for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
   171  		if i == 0 {
   172  			return false
   173  		}
   174  	}
   175  
   176  	return true
   177  }
   178  
   179  // blob returns the current value
   180  func (l *lexer) blob() string {
   181  	return l.input[l.start:l.pos]
   182  }
   183  
   184  // Emits a new token on to token channel for processing
   185  func (l *lexer) emit(t tokenType) {
   186  	token := token{t, l.lineno, l.blob()}
   187  
   188  	if l.debug {
   189  		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
   190  	}
   191  
   192  	l.tokens <- token
   193  	l.start = l.pos
   194  }
   195  
   196  // lexLine is state function for lexing lines
   197  func lexLine(l *lexer) stateFn {
   198  	for {
   199  		switch r := l.next(); {
   200  		case r == '\n':
   201  			l.emit(lineEnd)
   202  			l.ignore()
   203  			l.lineno++
   204  
   205  			l.emit(lineStart)
   206  		case r == ';' && l.peek() == ';':
   207  			return lexComment
   208  		case isSpace(r):
   209  			l.ignore()
   210  		case isLetter(r) || r == '_':
   211  			return lexElement
   212  		case isNumber(r):
   213  			return lexNumber
   214  		case r == '@':
   215  			l.ignore()
   216  			return lexLabel
   217  		case r == '"':
   218  			return lexInsideString
   219  		default:
   220  			return nil
   221  		}
   222  	}
   223  }
   224  
   225  // lexComment parses the current position until the end
   226  // of the line and discards the text.
   227  func lexComment(l *lexer) stateFn {
   228  	l.acceptRunUntil('\n')
   229  	l.ignore()
   230  
   231  	return lexLine
   232  }
   233  
   234  // lexLabel parses the current label, emits and returns
   235  // the lex text state function to advance the parsing
   236  // process.
   237  func lexLabel(l *lexer) stateFn {
   238  	l.acceptRun(Alpha + "_")
   239  
   240  	l.emit(label)
   241  
   242  	return lexLine
   243  }
   244  
   245  // lexInsideString lexes the inside of a string until
   246  // the state function finds the closing quote.
   247  // It returns the lex text state function.
   248  func lexInsideString(l *lexer) stateFn {
   249  	if l.acceptRunUntil('"') {
   250  		l.emit(stringValue)
   251  	}
   252  
   253  	return lexLine
   254  }
   255  
   256  func lexNumber(l *lexer) stateFn {
   257  	acceptance := Numbers
   258  	if l.accept("0") || l.accept("xX") {
   259  		acceptance = HexadecimalNumbers
   260  	}
   261  	l.acceptRun(acceptance)
   262  
   263  	l.emit(number)
   264  
   265  	return lexLine
   266  }
   267  
   268  func lexElement(l *lexer) stateFn {
   269  	l.acceptRun(Alpha + "_" + Numbers)
   270  
   271  	if l.peek() == ':' {
   272  		l.emit(labelDef)
   273  
   274  		l.accept(":")
   275  		l.ignore()
   276  	} else {
   277  		l.emit(element)
   278  	}
   279  	return lexLine
   280  }
   281  
   282  func isLetter(t rune) bool {
   283  	return unicode.IsLetter(t)
   284  }
   285  
   286  func isSpace(t rune) bool {
   287  	return unicode.IsSpace(t)
   288  }
   289  
   290  func isNumber(t rune) bool {
   291  	return unicode.IsNumber(t)
   292  }