github.com/zxy12/go_duplicate_1_12@v0.0.0-20200217043740-b1636fc0368b/src/cmd/asm/internal/lex/tokenizer.go

github.com/zxy12/go_duplicate_1_12@v0.0.0-20200217043740-b1636fc0368b/src/cmd/asm/internal/lex/tokenizer.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package lex
     6  
     7  import (
     8  	"io"
     9  	"os"
    10  	"strings"
    11  	"text/scanner"
    12  	"unicode"
    13  
    14  	"cmd/asm/internal/flags"
    15  	"cmd/internal/objabi"
    16  	"cmd/internal/src"
    17  	"fmt"
    18  )
    19  
    20  // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
    21  // for our purposes and made a TokenReader. It forms the lowest level,
    22  // turning text from readers into tokens.
    23  type Tokenizer struct {
    24  	tok  ScanToken
    25  	s    *scanner.Scanner
    26  	base *src.PosBase
    27  	line int
    28  	file *os.File // If non-nil, file descriptor to close.
    29  }
    30  
    31  func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
    32  	var s scanner.Scanner
    33  	s.Init(r)
    34  	// Newline is like a semicolon; other space characters are fine.
    35  	s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
    36  	// Don't skip comments: we need to count newlines.
    37  	s.Mode = scanner.ScanChars |
    38  		scanner.ScanFloats |
    39  		scanner.ScanIdents |
    40  		scanner.ScanInts |
    41  		scanner.ScanStrings |
    42  		scanner.ScanComments
    43  	s.Position.Filename = name
    44  	s.IsIdentRune = isIdentRune
    45  	return &Tokenizer{
    46  		s:    &s,
    47  		base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)),
    48  		line: 1,
    49  		file: file,
    50  	}
    51  }
    52  
    53  // We want center dot (·) and division slash (∕) to work as identifier characters.
    54  func isIdentRune(ch rune, i int) bool {
    55  	if unicode.IsLetter(ch) {
    56  		return true
    57  	}
    58  	switch ch {
    59  	case '_': // Underscore; traditional.
    60  		return true
    61  	case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
    62  		return true
    63  	case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
    64  		return true
    65  	}
    66  	// Digits are OK only after the first character.
    67  	return i > 0 && unicode.IsDigit(ch)
    68  }
    69  
    70  func (t *Tokenizer) Text() string {
    71  	switch t.tok {
    72  	case LSH:
    73  		return "<<"
    74  	case RSH:
    75  		return ">>"
    76  	case ARR:
    77  		return "->"
    78  	case ROT:
    79  		return "@>"
    80  	}
    81  	return t.s.TokenText()
    82  }
    83  
    84  func (t *Tokenizer) File() string {
    85  	return t.base.Filename()
    86  }
    87  
    88  func (t *Tokenizer) Base() *src.PosBase {
    89  	return t.base
    90  }
    91  
    92  func (t *Tokenizer) SetBase(base *src.PosBase) {
    93  	t.base = base
    94  }
    95  
    96  func (t *Tokenizer) Line() int {
    97  	return t.line
    98  }
    99  
   100  func (t *Tokenizer) Col() int {
   101  	return t.s.Pos().Column
   102  }
   103  
   104  func (t *Tokenizer) Next() ScanToken {
   105  	s := t.s
   106  	for {
   107  		t.tok = ScanToken(s.Scan())
   108  		if t.tok != scanner.Comment {
   109  			break
   110  		}
   111  		length := strings.Count(s.TokenText(), "\n")
   112  		t.line += length
   113  		// TODO: If we ever have //go: comments in assembly, will need to keep them here.
   114  		// For now, just discard all comments.
   115  	}
   116  	fmt.Println("text-intokenizer:", s.TokenText(), ",pos:", s.Pos(), ",peek:", s.Peek())
   117  	switch t.tok {
   118  	case '\n':
   119  		t.line++
   120  	case '-':
   121  		if s.Peek() == '>' {
   122  			s.Next()
   123  			t.tok = ARR
   124  			return ARR
   125  		}
   126  	case '@':
   127  		if s.Peek() == '>' {
   128  			s.Next()
   129  			t.tok = ROT
   130  			return ROT
   131  		}
   132  	case '<':
   133  		if s.Peek() == '<' {
   134  			s.Next()
   135  			t.tok = LSH
   136  			return LSH
   137  		}
   138  	case '>':
   139  		if s.Peek() == '>' {
   140  			s.Next()
   141  			t.tok = RSH
   142  			return RSH
   143  		}
   144  	}
   145  	return t.tok
   146  }
   147  
   148  func (t *Tokenizer) Close() {
   149  	if t.file != nil {
   150  		t.file.Close()
   151  	}
   152  }