github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/cmd/asm/internal/lex/lex.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package lex implements lexical analysis for the assembler.
     6  package lex
     7  
     8  import (
     9  	"fmt"
    10  	"log"
    11  	"os"
    12  	"strings"
    13  	"text/scanner"
    14  
    15  	"cmd/internal/obj"
    16  )
    17  
    18  // A ScanToken represents an input item. It is a simple wrapping of rune, as
    19  // returned by text/scanner.Scanner, plus a couple of extra values.
    20  type ScanToken rune
    21  
    22  const (
    23  	// Asm defines some two-character lexemes. We make up
    24  	// a rune/ScanToken value for them - ugly but simple.
    25  	LSH       ScanToken = -1000 - iota // << Left shift.
    26  	RSH                                // >> Logical right shift.
    27  	ARR                                // -> Used on ARM for shift type 3, arithmetic right shift.
    28  	ROT                                // @> Used on ARM for shift type 4, rotate right.
    29  	macroName                          // name of macro that should not be expanded
    30  )
    31  
    32  // IsRegisterShift reports whether the token is one of the ARM register shift operators.
    33  func IsRegisterShift(r ScanToken) bool {
    34  	return ROT <= r && r <= LSH // Order looks backwards because these are negative.
    35  }
    36  
    37  func (t ScanToken) String() string {
    38  	switch t {
    39  	case scanner.EOF:
    40  		return "EOF"
    41  	case scanner.Ident:
    42  		return "identifier"
    43  	case scanner.Int:
    44  		return "integer constant"
    45  	case scanner.Float:
    46  		return "float constant"
    47  	case scanner.Char:
    48  		return "rune constant"
    49  	case scanner.String:
    50  		return "string constant"
    51  	case scanner.RawString:
    52  		return "raw string constant"
    53  	case scanner.Comment:
    54  		return "comment"
    55  	default:
    56  		return fmt.Sprintf("%q", rune(t))
    57  	}
    58  }
    59  
    60  var (
    61  	// It might be nice if these weren't global.
    62  	linkCtxt *obj.Link     // The link context for all instructions.
    63  	histLine int       = 1 // The cumulative count of lines processed.
    64  )
    65  
    66  // HistLine reports the cumulative source line number of the token,
    67  // for use in the Prog structure for the linker. (It's always handling the
    68  // instruction from the current lex line.)
    69  // It returns int32 because that's what type ../asm prefers.
    70  func HistLine() int32 {
    71  	return int32(histLine)
    72  }
    73  
    74  // NewLexer returns a lexer for the named file and the given link context.
    75  func NewLexer(name string, ctxt *obj.Link) TokenReader {
    76  	linkCtxt = ctxt
    77  	input := NewInput(name)
    78  	fd, err := os.Open(name)
    79  	if err != nil {
    80  		log.Fatalf("asm: %s\n", err)
    81  	}
    82  	input.Push(NewTokenizer(name, fd, fd))
    83  	return input
    84  }
    85  
    86  // InitHist sets the line count to 1, for reproducible testing.
    87  func InitHist() {
    88  	histLine = 1
    89  }
    90  
    91  // The other files in this directory each contain an implementation of TokenReader.
    92  
    93  // A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what
    94  // the text of the most recently returned token is, and where it was found.
    95  // The underlying scanner elides all spaces except newline, so the input looks like a  stream of
    96  // Tokens; original spacing is lost but we don't need it.
    97  type TokenReader interface {
    98  	// Next returns the next token.
    99  	Next() ScanToken
   100  	// The following methods all refer to the most recent token returned by Next.
   101  	// Text returns the original string representation of the token.
   102  	Text() string
   103  	// File reports the source file name of the token.
   104  	File() string
   105  	// Line reports the source line number of the token.
   106  	Line() int
   107  	// Col reports the source column number of the token.
   108  	Col() int
   109  	// SetPos sets the file and line number.
   110  	SetPos(line int, file string)
   111  	// Close does any teardown required.
   112  	Close()
   113  }
   114  
   115  // A Token is a scan token plus its string value.
   116  // A macro is stored as a sequence of Tokens with spaces stripped.
   117  type Token struct {
   118  	ScanToken
   119  	text string
   120  }
   121  
   122  // Make returns a Token with the given rune (ScanToken) and text representation.
   123  func Make(token ScanToken, text string) Token {
   124  	// If the symbol starts with center dot, as in ·x, rewrite it as ""·x
   125  	if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {
   126  		text = `""` + text
   127  	}
   128  	// Substitute the substitutes for . and /.
   129  	text = strings.Replace(text, "\u00B7", ".", -1)
   130  	text = strings.Replace(text, "\u2215", "/", -1)
   131  	return Token{ScanToken: token, text: text}
   132  }
   133  
   134  func (l Token) String() string {
   135  	return l.text
   136  }
   137  
   138  // A Macro represents the definition of a #defined macro.
   139  type Macro struct {
   140  	name   string   // The #define name.
   141  	args   []string // Formal arguments.
   142  	tokens []Token  // Body of macro.
   143  }
   144  
   145  // Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
   146  func Tokenize(str string) []Token {
   147  	t := NewTokenizer("command line", strings.NewReader(str), nil)
   148  	var tokens []Token
   149  	for {
   150  		tok := t.Next()
   151  		if tok == scanner.EOF {
   152  			break
   153  		}
   154  		tokens = append(tokens, Make(tok, t.Text()))
   155  	}
   156  	return tokens
   157  }