github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/core/asm/lexer.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package asm
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"strings"
    23  	"unicode"
    24  	"unicode/utf8"
    25  )
    26  
    27  // stateFn is used through the lifetime of the
    28  // lexer to parse the different values at the
    29  // current state.
    30  type stateFn func(*lexer) stateFn
    31  
    32  // token is emitted when the lexer has discovered
    33  // a new parsable token. These are delivered over
    34  // the tokens channels of the lexer
    35  type token struct {
    36  	typ    tokenType
    37  	lineno int
    38  	text   string
    39  }
    40  
    41  // tokenType are the different types the lexer
    42  // is able to parse and return.
    43  type tokenType int
    44  
    45  //go:generate go run golang.org/x/tools/cmd/stringer -type tokenType
    46  
    47  const (
    48  	eof              tokenType = iota // end of file
    49  	lineStart                         // emitted when a line starts
    50  	lineEnd                           // emitted when a line ends
    51  	invalidStatement                  // any invalid statement
    52  	element                           // any element during element parsing
    53  	label                             // label is emitted when a label is found
    54  	labelDef                          // label definition is emitted when a new label is found
    55  	number                            // number is emitted when a number is found
    56  	stringValue                       // stringValue is emitted when a string has been found
    57  )
    58  
    59  const (
    60  	decimalNumbers = "1234567890"                                           // characters representing any decimal number
    61  	hexNumbers     = decimalNumbers + "aAbBcCdDeEfF"                        // characters representing any hexadecimal
    62  	alpha          = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
    63  )
    64  
    65  // lexer is the basic construct for parsing
    66  // source code and turning them in to tokens.
    67  // Tokens are interpreted by the compiler.
    68  type lexer struct {
    69  	input string // input contains the source code of the program
    70  
    71  	tokens chan token // tokens is used to deliver tokens to the listener
    72  	state  stateFn    // the current state function
    73  
    74  	lineno            int // current line number in the source file
    75  	start, pos, width int // positions for lexing and returning value
    76  
    77  	debug bool // flag for triggering debug output
    78  }
    79  
    80  // Lex lexes the program by name with the given source. It returns a
    81  // channel on which the tokens are delivered.
    82  func Lex(source []byte, debug bool) <-chan token {
    83  	ch := make(chan token)
    84  	l := &lexer{
    85  		input:  string(source),
    86  		tokens: ch,
    87  		state:  lexLine,
    88  		debug:  debug,
    89  	}
    90  	go func() {
    91  		l.emit(lineStart)
    92  		for l.state != nil {
    93  			l.state = l.state(l)
    94  		}
    95  		l.emit(eof)
    96  		close(l.tokens)
    97  	}()
    98  
    99  	return ch
   100  }
   101  
   102  // next returns the next rune in the program's source.
   103  func (l *lexer) next() (rune rune) {
   104  	if l.pos >= len(l.input) {
   105  		l.width = 0
   106  		return 0
   107  	}
   108  	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   109  	l.pos += l.width
   110  	return rune
   111  }
   112  
   113  // backup backsup the last parsed element (multi-character)
   114  func (l *lexer) backup() {
   115  	l.pos -= l.width
   116  }
   117  
   118  // peek returns the next rune but does not advance the seeker
   119  func (l *lexer) peek() rune {
   120  	r := l.next()
   121  	l.backup()
   122  	return r
   123  }
   124  
   125  // ignore advances the seeker and ignores the value
   126  func (l *lexer) ignore() {
   127  	l.start = l.pos
   128  }
   129  
   130  // accept checks whether the given input matches the next rune
   131  func (l *lexer) accept(valid string) bool {
   132  	if strings.ContainsRune(valid, l.next()) {
   133  		return true
   134  	}
   135  
   136  	l.backup()
   137  
   138  	return false
   139  }
   140  
   141  // acceptRun will continue to advance the seeker until valid
   142  // can no longer be met.
   143  func (l *lexer) acceptRun(valid string) {
   144  	for strings.ContainsRune(valid, l.next()) {
   145  	}
   146  	l.backup()
   147  }
   148  
   149  // acceptRunUntil is the inverse of acceptRun and will continue
   150  // to advance the seeker until the rune has been found.
   151  func (l *lexer) acceptRunUntil(until rune) bool {
   152  	// Continues running until a rune is found
   153  	for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
   154  		if i == 0 {
   155  			return false
   156  		}
   157  	}
   158  
   159  	return true
   160  }
   161  
   162  // blob returns the current value
   163  func (l *lexer) blob() string {
   164  	return l.input[l.start:l.pos]
   165  }
   166  
   167  // Emits a new token on to token channel for processing
   168  func (l *lexer) emit(t tokenType) {
   169  	token := token{t, l.lineno, l.blob()}
   170  
   171  	if l.debug {
   172  		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
   173  	}
   174  
   175  	l.tokens <- token
   176  	l.start = l.pos
   177  }
   178  
   179  // lexLine is state function for lexing lines
   180  func lexLine(l *lexer) stateFn {
   181  	for {
   182  		switch r := l.next(); {
   183  		case r == '\n':
   184  			l.emit(lineEnd)
   185  			l.ignore()
   186  			l.lineno++
   187  			l.emit(lineStart)
   188  		case r == ';' && l.peek() == ';':
   189  			return lexComment
   190  		case isSpace(r):
   191  			l.ignore()
   192  		case isLetter(r) || r == '_':
   193  			return lexElement
   194  		case isNumber(r):
   195  			return lexNumber
   196  		case r == '@':
   197  			l.ignore()
   198  			return lexLabel
   199  		case r == '"':
   200  			return lexInsideString
   201  		default:
   202  			return nil
   203  		}
   204  	}
   205  }
   206  
   207  // lexComment parses the current position until the end
   208  // of the line and discards the text.
   209  func lexComment(l *lexer) stateFn {
   210  	l.acceptRunUntil('\n')
   211  	l.backup()
   212  	l.ignore()
   213  
   214  	return lexLine
   215  }
   216  
   217  // lexLabel parses the current label, emits and returns
   218  // the lex text state function to advance the parsing
   219  // process.
   220  func lexLabel(l *lexer) stateFn {
   221  	l.acceptRun(alpha + "_" + decimalNumbers)
   222  
   223  	l.emit(label)
   224  
   225  	return lexLine
   226  }
   227  
   228  // lexInsideString lexes the inside of a string until
   229  // the state function finds the closing quote.
   230  // It returns the lex text state function.
   231  func lexInsideString(l *lexer) stateFn {
   232  	if l.acceptRunUntil('"') {
   233  		l.emit(stringValue)
   234  	}
   235  
   236  	return lexLine
   237  }
   238  
   239  func lexNumber(l *lexer) stateFn {
   240  	acceptance := decimalNumbers
   241  	if l.accept("xX") {
   242  		acceptance = hexNumbers
   243  	}
   244  	l.acceptRun(acceptance)
   245  
   246  	l.emit(number)
   247  
   248  	return lexLine
   249  }
   250  
   251  func lexElement(l *lexer) stateFn {
   252  	l.acceptRun(alpha + "_" + decimalNumbers)
   253  
   254  	if l.peek() == ':' {
   255  		l.emit(labelDef)
   256  
   257  		l.accept(":")
   258  		l.ignore()
   259  	} else {
   260  		l.emit(element)
   261  	}
   262  	return lexLine
   263  }
   264  
   265  func isLetter(t rune) bool {
   266  	return unicode.IsLetter(t)
   267  }
   268  
   269  func isSpace(t rune) bool {
   270  	return unicode.IsSpace(t)
   271  }
   272  
   273  func isNumber(t rune) bool {
   274  	return unicode.IsNumber(t)
   275  }