github.com/jimmyx0x/go-ethereum@v1.10.28/core/asm/lexer.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package asm
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"strings"
    23  	"unicode"
    24  	"unicode/utf8"
    25  )
    26  
    27  // stateFn is used through the lifetime of the
    28  // lexer to parse the different values at the
    29  // current state.
    30  type stateFn func(*lexer) stateFn
    31  
    32  // token is emitted when the lexer has discovered
    33  // a new parsable token. These are delivered over
    34  // the tokens channels of the lexer
    35  type token struct {
    36  	typ    tokenType
    37  	lineno int
    38  	text   string
    39  }
    40  
    41  // tokenType are the different types the lexer
    42  // is able to parse and return.
    43  type tokenType int
    44  
    45  const (
    46  	eof              tokenType = iota // end of file
    47  	lineStart                         // emitted when a line starts
    48  	lineEnd                           // emitted when a line ends
    49  	invalidStatement                  // any invalid statement
    50  	element                           // any element during element parsing
    51  	label                             // label is emitted when a label is found
    52  	labelDef                          // label definition is emitted when a new label is found
    53  	number                            // number is emitted when a number is found
    54  	stringValue                       // stringValue is emitted when a string has been found
    55  
    56  	Numbers            = "1234567890"                                           // characters representing any decimal number
    57  	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
    58  	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
    59  )
    60  
    61  // String implements stringer
    62  func (it tokenType) String() string {
    63  	if int(it) > len(stringtokenTypes) {
    64  		return "invalid"
    65  	}
    66  	return stringtokenTypes[it]
    67  }
    68  
    69  var stringtokenTypes = []string{
    70  	eof:              "EOF",
    71  	lineStart:        "new line",
    72  	lineEnd:          "end of line",
    73  	invalidStatement: "invalid statement",
    74  	element:          "element",
    75  	label:            "label",
    76  	labelDef:         "label definition",
    77  	number:           "number",
    78  	stringValue:      "string",
    79  }
    80  
    81  // lexer is the basic construct for parsing
    82  // source code and turning them in to tokens.
    83  // Tokens are interpreted by the compiler.
    84  type lexer struct {
    85  	input string // input contains the source code of the program
    86  
    87  	tokens chan token // tokens is used to deliver tokens to the listener
    88  	state  stateFn    // the current state function
    89  
    90  	lineno            int // current line number in the source file
    91  	start, pos, width int // positions for lexing and returning value
    92  
    93  	debug bool // flag for triggering debug output
    94  }
    95  
    96  // Lex lexes the program by name with the given source. It returns a
    97  // channel on which the tokens are delivered.
    98  func Lex(source []byte, debug bool) <-chan token {
    99  	ch := make(chan token)
   100  	l := &lexer{
   101  		input:  string(source),
   102  		tokens: ch,
   103  		state:  lexLine,
   104  		debug:  debug,
   105  	}
   106  	go func() {
   107  		l.emit(lineStart)
   108  		for l.state != nil {
   109  			l.state = l.state(l)
   110  		}
   111  		l.emit(eof)
   112  		close(l.tokens)
   113  	}()
   114  
   115  	return ch
   116  }
   117  
   118  // next returns the next rune in the program's source.
   119  func (l *lexer) next() (rune rune) {
   120  	if l.pos >= len(l.input) {
   121  		l.width = 0
   122  		return 0
   123  	}
   124  	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   125  	l.pos += l.width
   126  	return rune
   127  }
   128  
   129  // backup backsup the last parsed element (multi-character)
   130  func (l *lexer) backup() {
   131  	l.pos -= l.width
   132  }
   133  
   134  // peek returns the next rune but does not advance the seeker
   135  func (l *lexer) peek() rune {
   136  	r := l.next()
   137  	l.backup()
   138  	return r
   139  }
   140  
   141  // ignore advances the seeker and ignores the value
   142  func (l *lexer) ignore() {
   143  	l.start = l.pos
   144  }
   145  
   146  // Accepts checks whether the given input matches the next rune
   147  func (l *lexer) accept(valid string) bool {
   148  	if strings.ContainsRune(valid, l.next()) {
   149  		return true
   150  	}
   151  
   152  	l.backup()
   153  
   154  	return false
   155  }
   156  
   157  // acceptRun will continue to advance the seeker until valid
   158  // can no longer be met.
   159  func (l *lexer) acceptRun(valid string) {
   160  	for strings.ContainsRune(valid, l.next()) {
   161  	}
   162  	l.backup()
   163  }
   164  
   165  // acceptRunUntil is the inverse of acceptRun and will continue
   166  // to advance the seeker until the rune has been found.
   167  func (l *lexer) acceptRunUntil(until rune) bool {
   168  	// Continues running until a rune is found
   169  	for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
   170  		if i == 0 {
   171  			return false
   172  		}
   173  	}
   174  
   175  	return true
   176  }
   177  
   178  // blob returns the current value
   179  func (l *lexer) blob() string {
   180  	return l.input[l.start:l.pos]
   181  }
   182  
   183  // Emits a new token on to token channel for processing
   184  func (l *lexer) emit(t tokenType) {
   185  	token := token{t, l.lineno, l.blob()}
   186  
   187  	if l.debug {
   188  		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
   189  	}
   190  
   191  	l.tokens <- token
   192  	l.start = l.pos
   193  }
   194  
   195  // lexLine is state function for lexing lines
   196  func lexLine(l *lexer) stateFn {
   197  	for {
   198  		switch r := l.next(); {
   199  		case r == '\n':
   200  			l.emit(lineEnd)
   201  			l.ignore()
   202  			l.lineno++
   203  
   204  			l.emit(lineStart)
   205  		case r == ';' && l.peek() == ';':
   206  			return lexComment
   207  		case isSpace(r):
   208  			l.ignore()
   209  		case isLetter(r) || r == '_':
   210  			return lexElement
   211  		case isNumber(r):
   212  			return lexNumber
   213  		case r == '@':
   214  			l.ignore()
   215  			return lexLabel
   216  		case r == '"':
   217  			return lexInsideString
   218  		default:
   219  			return nil
   220  		}
   221  	}
   222  }
   223  
   224  // lexComment parses the current position until the end
   225  // of the line and discards the text.
   226  func lexComment(l *lexer) stateFn {
   227  	l.acceptRunUntil('\n')
   228  	l.ignore()
   229  
   230  	return lexLine
   231  }
   232  
   233  // lexLabel parses the current label, emits and returns
   234  // the lex text state function to advance the parsing
   235  // process.
   236  func lexLabel(l *lexer) stateFn {
   237  	l.acceptRun(Alpha + "_" + Numbers)
   238  
   239  	l.emit(label)
   240  
   241  	return lexLine
   242  }
   243  
   244  // lexInsideString lexes the inside of a string until
   245  // the state function finds the closing quote.
   246  // It returns the lex text state function.
   247  func lexInsideString(l *lexer) stateFn {
   248  	if l.acceptRunUntil('"') {
   249  		l.emit(stringValue)
   250  	}
   251  
   252  	return lexLine
   253  }
   254  
   255  func lexNumber(l *lexer) stateFn {
   256  	acceptance := Numbers
   257  	if l.accept("xX") {
   258  		acceptance = HexadecimalNumbers
   259  	}
   260  	l.acceptRun(acceptance)
   261  
   262  	l.emit(number)
   263  
   264  	return lexLine
   265  }
   266  
   267  func lexElement(l *lexer) stateFn {
   268  	l.acceptRun(Alpha + "_" + Numbers)
   269  
   270  	if l.peek() == ':' {
   271  		l.emit(labelDef)
   272  
   273  		l.accept(":")
   274  		l.ignore()
   275  	} else {
   276  		l.emit(element)
   277  	}
   278  	return lexLine
   279  }
   280  
   281  func isLetter(t rune) bool {
   282  	return unicode.IsLetter(t)
   283  }
   284  
   285  func isSpace(t rune) bool {
   286  	return unicode.IsSpace(t)
   287  }
   288  
   289  func isNumber(t rune) bool {
   290  	return unicode.IsNumber(t)
   291  }