github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/cmd/asm/internal/lex/tokenizer.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package lex
     6  
     7  import (
     8  	"io"
     9  	"os"
    10  	"strings"
    11  	"text/scanner"
    12  	"unicode"
    13  )
    14  
    15  // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
    16  // for our purposes and made a TokenReader. It forms the lowest level,
    17  // turning text from readers into tokens.
    18  type Tokenizer struct {
    19  	tok      ScanToken
    20  	s        *scanner.Scanner
    21  	line     int
    22  	fileName string
    23  	file     *os.File // If non-nil, file descriptor to close.
    24  }
    25  
    26  func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
    27  	var s scanner.Scanner
    28  	s.Init(r)
    29  	// Newline is like a semicolon; other space characters are fine.
    30  	s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
    31  	// Don't skip comments: we need to count newlines.
    32  	s.Mode = scanner.ScanChars |
    33  		scanner.ScanFloats |
    34  		scanner.ScanIdents |
    35  		scanner.ScanInts |
    36  		scanner.ScanStrings |
    37  		scanner.ScanComments
    38  	s.Position.Filename = name
    39  	s.IsIdentRune = isIdentRune
    40  	if file != nil {
    41  		linkCtxt.LineHist.Push(histLine, name)
    42  	}
    43  	return &Tokenizer{
    44  		s:        &s,
    45  		line:     1,
    46  		fileName: name,
    47  		file:     file,
    48  	}
    49  }
    50  
    51  // We want center dot (·) and division slash (∕) to work as identifier characters.
    52  func isIdentRune(ch rune, i int) bool {
    53  	if unicode.IsLetter(ch) {
    54  		return true
    55  	}
    56  	switch ch {
    57  	case '_': // Underscore; traditional.
    58  		return true
    59  	case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
    60  		return true
    61  	case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
    62  		return true
    63  	}
    64  	// Digits are OK only after the first character.
    65  	return i > 0 && unicode.IsDigit(ch)
    66  }
    67  
    68  func (t *Tokenizer) Text() string {
    69  	switch t.tok {
    70  	case LSH:
    71  		return "<<"
    72  	case RSH:
    73  		return ">>"
    74  	case ARR:
    75  		return "->"
    76  	case ROT:
    77  		return "@>"
    78  	}
    79  	return t.s.TokenText()
    80  }
    81  
    82  func (t *Tokenizer) File() string {
    83  	return t.fileName
    84  }
    85  
    86  func (t *Tokenizer) Line() int {
    87  	return t.line
    88  }
    89  
    90  func (t *Tokenizer) Col() int {
    91  	return t.s.Pos().Column
    92  }
    93  
    94  func (t *Tokenizer) SetPos(line int, file string) {
    95  	t.line = line
    96  	t.fileName = file
    97  }
    98  
    99  func (t *Tokenizer) Next() ScanToken {
   100  	s := t.s
   101  	for {
   102  		t.tok = ScanToken(s.Scan())
   103  		if t.tok != scanner.Comment {
   104  			break
   105  		}
   106  		length := strings.Count(s.TokenText(), "\n")
   107  		t.line += length
   108  		histLine += length
   109  		// TODO: If we ever have //go: comments in assembly, will need to keep them here.
   110  		// For now, just discard all comments.
   111  	}
   112  	switch t.tok {
   113  	case '\n':
   114  		if t.file != nil {
   115  			histLine++
   116  		}
   117  		t.line++
   118  	case '-':
   119  		if s.Peek() == '>' {
   120  			s.Next()
   121  			t.tok = ARR
   122  			return ARR
   123  		}
   124  	case '@':
   125  		if s.Peek() == '>' {
   126  			s.Next()
   127  			t.tok = ROT
   128  			return ROT
   129  		}
   130  	case '<':
   131  		if s.Peek() == '<' {
   132  			s.Next()
   133  			t.tok = LSH
   134  			return LSH
   135  		}
   136  	case '>':
   137  		if s.Peek() == '>' {
   138  			s.Next()
   139  			t.tok = RSH
   140  			return RSH
   141  		}
   142  	}
   143  	return t.tok
   144  }
   145  
   146  func (t *Tokenizer) Close() {
   147  	if t.file != nil {
   148  		t.file.Close()
   149  		// It's an open file, so pop the line history.
   150  		linkCtxt.LineHist.Pop(histLine)
   151  	}
   152  }