github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/asm/internal/lex/tokenizer.go

github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/asm/internal/lex/tokenizer.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package lex
     6  
     7  import (
     8  	"io"
     9  	"os"
    10  	"strings"
    11  	"text/scanner"
    12  	"unicode"
    13  
    14  	"github.com/gagliardetto/golang-go/cmd/asm/internal/flags"
    15  	"github.com/gagliardetto/golang-go/cmd/internal/objabi"
    16  	"github.com/gagliardetto/golang-go/cmd/internal/src"
    17  )
    18  
    19  // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
    20  // for our purposes and made a TokenReader. It forms the lowest level,
    21  // turning text from readers into tokens.
    22  type Tokenizer struct {
    23  	tok  ScanToken
    24  	s    *scanner.Scanner
    25  	base *src.PosBase
    26  	line int
    27  	file *os.File // If non-nil, file descriptor to close.
    28  }
    29  
    30  func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
    31  	var s scanner.Scanner
    32  	s.Init(r)
    33  	// Newline is like a semicolon; other space characters are fine.
    34  	s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
    35  	// Don't skip comments: we need to count newlines.
    36  	s.Mode = scanner.ScanChars |
    37  		scanner.ScanFloats |
    38  		scanner.ScanIdents |
    39  		scanner.ScanInts |
    40  		scanner.ScanStrings |
    41  		scanner.ScanComments
    42  	s.Position.Filename = name
    43  	s.IsIdentRune = isIdentRune
    44  	return &Tokenizer{
    45  		s:    &s,
    46  		base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)),
    47  		line: 1,
    48  		file: file,
    49  	}
    50  }
    51  
    52  // We want center dot (·) and division slash (∕) to work as identifier characters.
    53  func isIdentRune(ch rune, i int) bool {
    54  	if unicode.IsLetter(ch) {
    55  		return true
    56  	}
    57  	switch ch {
    58  	case '_': // Underscore; traditional.
    59  		return true
    60  	case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
    61  		return true
    62  	case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
    63  		return true
    64  	}
    65  	// Digits are OK only after the first character.
    66  	return i > 0 && unicode.IsDigit(ch)
    67  }
    68  
    69  func (t *Tokenizer) Text() string {
    70  	switch t.tok {
    71  	case LSH:
    72  		return "<<"
    73  	case RSH:
    74  		return ">>"
    75  	case ARR:
    76  		return "->"
    77  	case ROT:
    78  		return "@>"
    79  	}
    80  	return t.s.TokenText()
    81  }
    82  
    83  func (t *Tokenizer) File() string {
    84  	return t.base.Filename()
    85  }
    86  
    87  func (t *Tokenizer) Base() *src.PosBase {
    88  	return t.base
    89  }
    90  
    91  func (t *Tokenizer) SetBase(base *src.PosBase) {
    92  	t.base = base
    93  }
    94  
    95  func (t *Tokenizer) Line() int {
    96  	return t.line
    97  }
    98  
    99  func (t *Tokenizer) Col() int {
   100  	return t.s.Pos().Column
   101  }
   102  
   103  func (t *Tokenizer) Next() ScanToken {
   104  	s := t.s
   105  	for {
   106  		t.tok = ScanToken(s.Scan())
   107  		if t.tok != scanner.Comment {
   108  			break
   109  		}
   110  		length := strings.Count(s.TokenText(), "\n")
   111  		t.line += length
   112  		// TODO: If we ever have //go: comments in assembly, will need to keep them here.
   113  		// For now, just discard all comments.
   114  	}
   115  	switch t.tok {
   116  	case '\n':
   117  		t.line++
   118  	case '-':
   119  		if s.Peek() == '>' {
   120  			s.Next()
   121  			t.tok = ARR
   122  			return ARR
   123  		}
   124  	case '@':
   125  		if s.Peek() == '>' {
   126  			s.Next()
   127  			t.tok = ROT
   128  			return ROT
   129  		}
   130  	case '<':
   131  		if s.Peek() == '<' {
   132  			s.Next()
   133  			t.tok = LSH
   134  			return LSH
   135  		}
   136  	case '>':
   137  		if s.Peek() == '>' {
   138  			s.Next()
   139  			t.tok = RSH
   140  			return RSH
   141  		}
   142  	}
   143  	return t.tok
   144  }
   145  
   146  func (t *Tokenizer) Close() {
   147  	if t.file != nil {
   148  		t.file.Close()
   149  	}
   150  }