github.com/arieschain/arieschain@v0.0.0-20191023063405-37c074544356/core/asm/lexer.go (about)

     1  package asm
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"strings"
     7  	"unicode"
     8  	"unicode/utf8"
     9  )
    10  
    11  // stateFn is used through the lifetime of the
    12  // lexer to parse the different values at the
    13  // current state.
    14  type stateFn func(*lexer) stateFn
    15  
    16  // token is emitted when the lexer has discovered
    17  // a new parsable token. These are delivered over
    18  // the tokens channels of the lexer
    19  type token struct {
    20  	typ    tokenType
    21  	lineno int
    22  	text   string
    23  }
    24  
    25  // tokenType are the different types the lexer
    26  // is able to parse and return.
    27  type tokenType int
    28  
    29  const (
    30  	eof              tokenType = iota // end of file
    31  	lineStart                         // emitted when a line starts
    32  	lineEnd                           // emitted when a line ends
    33  	invalidStatement                  // any invalid statement
    34  	element                           // any element during element parsing
    35  	label                             // label is emitted when a label is found
    36  	labelDef                          // label definition is emitted when a new label is found
    37  	number                            // number is emitted when a number is found
    38  	stringValue                       // stringValue is emitted when a string has been found
    39  
    40  	Numbers            = "1234567890"                                           // characters representing any decimal number
    41  	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
    42  	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
    43  )
    44  
    45  // String implements stringer
    46  func (it tokenType) String() string {
    47  	if int(it) > len(stringtokenTypes) {
    48  		return "invalid"
    49  	}
    50  	return stringtokenTypes[it]
    51  }
    52  
    53  var stringtokenTypes = []string{
    54  	eof:              "EOF",
    55  	invalidStatement: "invalid statement",
    56  	element:          "element",
    57  	lineEnd:          "end of line",
    58  	lineStart:        "new line",
    59  	label:            "label",
    60  	labelDef:         "label definition",
    61  	number:           "number",
    62  	stringValue:      "string",
    63  }
    64  
    65  // lexer is the basic construct for parsing
    66  // source code and turning them in to tokens.
    67  // Tokens are interpreted by the compiler.
    68  type lexer struct {
    69  	input string // input contains the source code of the program
    70  
    71  	tokens chan token // tokens is used to deliver tokens to the listener
    72  	state  stateFn    // the current state function
    73  
    74  	lineno            int // current line number in the source file
    75  	start, pos, width int // positions for lexing and returning value
    76  
    77  	debug bool // flag for triggering debug output
    78  }
    79  
    80  // lex lexes the program by name with the given source. It returns a
    81  // channel on which the tokens are delivered.
    82  func Lex(name string, source []byte, debug bool) <-chan token {
    83  	ch := make(chan token)
    84  	l := &lexer{
    85  		input:  string(source),
    86  		tokens: ch,
    87  		state:  lexLine,
    88  		debug:  debug,
    89  	}
    90  	go func() {
    91  		l.emit(lineStart)
    92  		for l.state != nil {
    93  			l.state = l.state(l)
    94  		}
    95  		l.emit(eof)
    96  		close(l.tokens)
    97  	}()
    98  
    99  	return ch
   100  }
   101  
   102  // next returns the next rune in the program's source.
   103  func (l *lexer) next() (rune rune) {
   104  	if l.pos >= len(l.input) {
   105  		l.width = 0
   106  		return 0
   107  	}
   108  	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   109  	l.pos += l.width
   110  	return rune
   111  }
   112  
   113  // backup backsup the last parsed element (multi-character)
   114  func (l *lexer) backup() {
   115  	l.pos -= l.width
   116  }
   117  
   118  // peek returns the next rune but does not advance the seeker
   119  func (l *lexer) peek() rune {
   120  	r := l.next()
   121  	l.backup()
   122  	return r
   123  }
   124  
   125  // ignore advances the seeker and ignores the value
   126  func (l *lexer) ignore() {
   127  	l.start = l.pos
   128  }
   129  
   130  // Accepts checks whether the given input matches the next rune
   131  func (l *lexer) accept(valid string) bool {
   132  	if strings.ContainsRune(valid, l.next()) {
   133  		return true
   134  	}
   135  
   136  	l.backup()
   137  
   138  	return false
   139  }
   140  
   141  // acceptRun will continue to advance the seeker until valid
   142  // can no longer be met.
   143  func (l *lexer) acceptRun(valid string) {
   144  	for strings.ContainsRune(valid, l.next()) {
   145  	}
   146  	l.backup()
   147  }
   148  
   149  // acceptRunUntil is the inverse of acceptRun and will continue
   150  // to advance the seeker until the rune has been found.
   151  func (l *lexer) acceptRunUntil(until rune) bool {
   152  	// Continues running until a rune is found
   153  	for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
   154  		if i == 0 {
   155  			return false
   156  		}
   157  	}
   158  
   159  	return true
   160  }
   161  
   162  // blob returns the current value
   163  func (l *lexer) blob() string {
   164  	return l.input[l.start:l.pos]
   165  }
   166  
   167  // Emits a new token on to token channel for processing
   168  func (l *lexer) emit(t tokenType) {
   169  	token := token{t, l.lineno, l.blob()}
   170  
   171  	if l.debug {
   172  		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
   173  	}
   174  
   175  	l.tokens <- token
   176  	l.start = l.pos
   177  }
   178  
   179  // lexLine is state function for lexing lines
   180  func lexLine(l *lexer) stateFn {
   181  	for {
   182  		switch r := l.next(); {
   183  		case r == '\n':
   184  			l.emit(lineEnd)
   185  			l.ignore()
   186  			l.lineno++
   187  
   188  			l.emit(lineStart)
   189  		case r == ';' && l.peek() == ';':
   190  			return lexComment
   191  		case isSpace(r):
   192  			l.ignore()
   193  		case isLetter(r) || r == '_':
   194  			return lexElement
   195  		case isNumber(r):
   196  			return lexNumber
   197  		case r == '@':
   198  			l.ignore()
   199  			return lexLabel
   200  		case r == '"':
   201  			return lexInsideString
   202  		default:
   203  			return nil
   204  		}
   205  	}
   206  }
   207  
   208  // lexComment parses the current position until the end
   209  // of the line and discards the text.
   210  func lexComment(l *lexer) stateFn {
   211  	l.acceptRunUntil('\n')
   212  	l.ignore()
   213  
   214  	return lexLine
   215  }
   216  
   217  // lexLabel parses the current label, emits and returns
   218  // the lex text state function to advance the parsing
   219  // process.
   220  func lexLabel(l *lexer) stateFn {
   221  	l.acceptRun(Alpha + "_")
   222  
   223  	l.emit(label)
   224  
   225  	return lexLine
   226  }
   227  
   228  // lexInsideString lexes the inside of a string until
   229  // until the state function finds the closing quote.
   230  // It returns the lex text state function.
   231  func lexInsideString(l *lexer) stateFn {
   232  	if l.acceptRunUntil('"') {
   233  		l.emit(stringValue)
   234  	}
   235  
   236  	return lexLine
   237  }
   238  
   239  func lexNumber(l *lexer) stateFn {
   240  	acceptance := Numbers
   241  	if l.accept("0") || l.accept("xX") {
   242  		acceptance = HexadecimalNumbers
   243  	}
   244  	l.acceptRun(acceptance)
   245  
   246  	l.emit(number)
   247  
   248  	return lexLine
   249  }
   250  
   251  func lexElement(l *lexer) stateFn {
   252  	l.acceptRun(Alpha + "_" + Numbers)
   253  
   254  	if l.peek() == ':' {
   255  		l.emit(labelDef)
   256  
   257  		l.accept(":")
   258  		l.ignore()
   259  	} else {
   260  		l.emit(element)
   261  	}
   262  	return lexLine
   263  }
   264  
   265  func isLetter(t rune) bool {
   266  	return unicode.IsLetter(t)
   267  }
   268  
   269  func isSpace(t rune) bool {
   270  	return unicode.IsSpace(t)
   271  }
   272  
   273  func isNumber(t rune) bool {
   274  	return unicode.IsNumber(t)
   275  }