github.com/klaytn/klaytn@v1.12.1/blockchain/asm/lexer.go (about)

     1  // Modifications Copyright 2018 The klaytn Authors
     2  // Copyright 2017 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from core/asm/lexer.go (2018/06/04).
    19  // Modified and improved for the klaytn development.
    20  
    21  package asm
    22  
    23  import (
    24  	"fmt"
    25  	"os"
    26  	"strings"
    27  	"unicode"
    28  	"unicode/utf8"
    29  )
    30  
    31  // stateFn is used through the lifetime of the
    32  // lexer to parse the different values at the
    33  // current state.
    34  type stateFn func(*lexer) stateFn
    35  
    36  // token is emitted when the lexer has discovered
    37  // a new parsable token. These are delivered over
    38  // the tokens channels of the lexer
    39  type token struct {
    40  	typ    tokenType
    41  	lineno int
    42  	text   string
    43  }
    44  
    45  // tokenType are the different types the lexer
    46  // is able to parse and return.
    47  type tokenType int
    48  
    49  const (
    50  	eof              tokenType = iota // end of file
    51  	lineStart                         // emitted when a line starts
    52  	lineEnd                           // emitted when a line ends
    53  	invalidStatement                  // any invalid statement
    54  	element                           // any element during element parsing
    55  	label                             // label is emitted when a label is found
    56  	labelDef                          // label definition is emitted when a new label is found
    57  	number                            // number is emitted when a number is found
    58  	stringValue                       // stringValue is emitted when a string has been found
    59  
    60  	Numbers            = "1234567890"                                           // characters representing any decimal number
    61  	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
    62  	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
    63  )
    64  
    65  // String implements stringer
    66  func (it tokenType) String() string {
    67  	if int(it) > len(stringtokenTypes) {
    68  		return "invalid"
    69  	}
    70  	return stringtokenTypes[it]
    71  }
    72  
    73  var stringtokenTypes = []string{
    74  	eof:              "EOF",
    75  	invalidStatement: "invalid statement",
    76  	element:          "element",
    77  	lineEnd:          "end of line",
    78  	lineStart:        "new line",
    79  	label:            "label",
    80  	labelDef:         "label definition",
    81  	number:           "number",
    82  	stringValue:      "string",
    83  }
    84  
    85  // lexer is the basic construct for parsing
    86  // source code and turning them in to tokens.
    87  // Tokens are interpreted by the compiler.
    88  type lexer struct {
    89  	input string // input contains the source code of the program
    90  
    91  	tokens chan token // tokens is used to deliver tokens to the listener
    92  	state  stateFn    // the current state function
    93  
    94  	lineno            int // current line number in the source file
    95  	start, pos, width int // positions for lexing and returning value
    96  
    97  	debug bool // flag for triggering debug output
    98  }
    99  
   100  // lex lexes the program by name with the given source. It returns a
   101  // channel on which the tokens are delivered.
   102  func Lex(source []byte, debug bool) <-chan token {
   103  	ch := make(chan token)
   104  	l := &lexer{
   105  		input:  string(source),
   106  		tokens: ch,
   107  		state:  lexLine,
   108  		debug:  debug,
   109  	}
   110  	go func() {
   111  		l.emit(lineStart)
   112  		for l.state != nil {
   113  			l.state = l.state(l)
   114  		}
   115  		l.emit(eof)
   116  		close(l.tokens)
   117  	}()
   118  
   119  	return ch
   120  }
   121  
   122  // next returns the next rune in the program's source.
   123  func (l *lexer) next() (rune rune) {
   124  	if l.pos >= len(l.input) {
   125  		l.width = 0
   126  		return 0
   127  	}
   128  	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   129  	l.pos += l.width
   130  	return rune
   131  }
   132  
   133  // backup backsup the last parsed element (multi-character)
   134  func (l *lexer) backup() {
   135  	l.pos -= l.width
   136  }
   137  
   138  // peek returns the next rune but does not advance the seeker
   139  func (l *lexer) peek() rune {
   140  	r := l.next()
   141  	l.backup()
   142  	return r
   143  }
   144  
   145  // ignore advances the seeker and ignores the value
   146  func (l *lexer) ignore() {
   147  	l.start = l.pos
   148  }
   149  
   150  // Accepts checks whether the given input matches the next rune
   151  func (l *lexer) accept(valid string) bool {
   152  	if strings.ContainsRune(valid, l.next()) {
   153  		return true
   154  	}
   155  
   156  	l.backup()
   157  
   158  	return false
   159  }
   160  
   161  // acceptRun will continue to advance the seeker until valid
   162  // can no longer be met.
   163  func (l *lexer) acceptRun(valid string) {
   164  	for strings.ContainsRune(valid, l.next()) {
   165  	}
   166  	l.backup()
   167  }
   168  
   169  // acceptRunUntil is the inverse of acceptRun and will continue
   170  // to advance the seeker until the rune has been found.
   171  func (l *lexer) acceptRunUntil(until rune) bool {
   172  	// Continues running until a rune is found
   173  	for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
   174  		if i == 0 {
   175  			return false
   176  		}
   177  	}
   178  
   179  	return true
   180  }
   181  
   182  // blob returns the current value
   183  func (l *lexer) blob() string {
   184  	return l.input[l.start:l.pos]
   185  }
   186  
   187  // Emits a new token on to token channel for processing
   188  func (l *lexer) emit(t tokenType) {
   189  	token := token{t, l.lineno, l.blob()}
   190  
   191  	if l.debug {
   192  		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
   193  	}
   194  
   195  	l.tokens <- token
   196  	l.start = l.pos
   197  }
   198  
   199  // lexLine is state function for lexing lines
   200  func lexLine(l *lexer) stateFn {
   201  	for {
   202  		switch r := l.next(); {
   203  		case r == '\n':
   204  			l.emit(lineEnd)
   205  			l.ignore()
   206  			l.lineno++
   207  
   208  			l.emit(lineStart)
   209  		case r == ';' && l.peek() == ';':
   210  			return lexComment
   211  		case isSpace(r):
   212  			l.ignore()
   213  		case isLetter(r) || r == '_':
   214  			return lexElement
   215  		case isNumber(r):
   216  			return lexNumber
   217  		case r == '@':
   218  			l.ignore()
   219  			return lexLabel
   220  		case r == '"':
   221  			return lexInsideString
   222  		default:
   223  			return nil
   224  		}
   225  	}
   226  }
   227  
   228  // lexComment parses the current position until the end
   229  // of the line and discards the text.
   230  func lexComment(l *lexer) stateFn {
   231  	l.acceptRunUntil('\n')
   232  	l.ignore()
   233  
   234  	return lexLine
   235  }
   236  
   237  // lexLabel parses the current label, emits and returns
   238  // the lex text state function to advance the parsing
   239  // process.
   240  func lexLabel(l *lexer) stateFn {
   241  	l.acceptRun(Alpha + "_")
   242  
   243  	l.emit(label)
   244  
   245  	return lexLine
   246  }
   247  
   248  // lexInsideString lexes the inside of a string until
   249  // the state function finds the closing quote.
   250  // It returns the lex text state function.
   251  func lexInsideString(l *lexer) stateFn {
   252  	if l.acceptRunUntil('"') {
   253  		l.emit(stringValue)
   254  	}
   255  
   256  	return lexLine
   257  }
   258  
   259  func lexNumber(l *lexer) stateFn {
   260  	acceptance := Numbers
   261  	if l.accept("0") || l.accept("xX") {
   262  		acceptance = HexadecimalNumbers
   263  	}
   264  	l.acceptRun(acceptance)
   265  
   266  	l.emit(number)
   267  
   268  	return lexLine
   269  }
   270  
   271  func lexElement(l *lexer) stateFn {
   272  	l.acceptRun(Alpha + "_" + Numbers)
   273  
   274  	if l.peek() == ':' {
   275  		l.emit(labelDef)
   276  
   277  		l.accept(":")
   278  		l.ignore()
   279  	} else {
   280  		l.emit(element)
   281  	}
   282  	return lexLine
   283  }
   284  
   285  func isLetter(t rune) bool {
   286  	return unicode.IsLetter(t)
   287  }
   288  
   289  func isSpace(t rune) bool {
   290  	return unicode.IsSpace(t)
   291  }
   292  
   293  func isNumber(t rune) bool {
   294  	return unicode.IsNumber(t)
   295  }