github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/asm/internal/lex/tokenizer.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package lex 6 7 import ( 8 "io" 9 "os" 10 "strings" 11 "text/scanner" 12 "unicode" 13 14 "github.com/gagliardetto/golang-go/cmd/asm/internal/flags" 15 "github.com/gagliardetto/golang-go/cmd/internal/objabi" 16 "github.com/gagliardetto/golang-go/cmd/internal/src" 17 ) 18 19 // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured 20 // for our purposes and made a TokenReader. It forms the lowest level, 21 // turning text from readers into tokens. 22 type Tokenizer struct { 23 tok ScanToken 24 s *scanner.Scanner 25 base *src.PosBase 26 line int 27 file *os.File // If non-nil, file descriptor to close. 28 } 29 30 func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { 31 var s scanner.Scanner 32 s.Init(r) 33 // Newline is like a semicolon; other space characters are fine. 34 s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' 35 // Don't skip comments: we need to count newlines. 36 s.Mode = scanner.ScanChars | 37 scanner.ScanFloats | 38 scanner.ScanIdents | 39 scanner.ScanInts | 40 scanner.ScanStrings | 41 scanner.ScanComments 42 s.Position.Filename = name 43 s.IsIdentRune = isIdentRune 44 return &Tokenizer{ 45 s: &s, 46 base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)), 47 line: 1, 48 file: file, 49 } 50 } 51 52 // We want center dot (·) and division slash (∕) to work as identifier characters. 53 func isIdentRune(ch rune, i int) bool { 54 if unicode.IsLetter(ch) { 55 return true 56 } 57 switch ch { 58 case '_': // Underscore; traditional. 59 return true 60 case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot 61 return true 62 case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash 63 return true 64 } 65 // Digits are OK only after the first character. 66 return i > 0 && unicode.IsDigit(ch) 67 } 68 69 func (t *Tokenizer) Text() string { 70 switch t.tok { 71 case LSH: 72 return "<<" 73 case RSH: 74 return ">>" 75 case ARR: 76 return "->" 77 case ROT: 78 return "@>" 79 } 80 return t.s.TokenText() 81 } 82 83 func (t *Tokenizer) File() string { 84 return t.base.Filename() 85 } 86 87 func (t *Tokenizer) Base() *src.PosBase { 88 return t.base 89 } 90 91 func (t *Tokenizer) SetBase(base *src.PosBase) { 92 t.base = base 93 } 94 95 func (t *Tokenizer) Line() int { 96 return t.line 97 } 98 99 func (t *Tokenizer) Col() int { 100 return t.s.Pos().Column 101 } 102 103 func (t *Tokenizer) Next() ScanToken { 104 s := t.s 105 for { 106 t.tok = ScanToken(s.Scan()) 107 if t.tok != scanner.Comment { 108 break 109 } 110 length := strings.Count(s.TokenText(), "\n") 111 t.line += length 112 // TODO: If we ever have //go: comments in assembly, will need to keep them here. 113 // For now, just discard all comments. 114 } 115 switch t.tok { 116 case '\n': 117 t.line++ 118 case '-': 119 if s.Peek() == '>' { 120 s.Next() 121 t.tok = ARR 122 return ARR 123 } 124 case '@': 125 if s.Peek() == '>' { 126 s.Next() 127 t.tok = ROT 128 return ROT 129 } 130 case '<': 131 if s.Peek() == '<' { 132 s.Next() 133 t.tok = LSH 134 return LSH 135 } 136 case '>': 137 if s.Peek() == '>' { 138 s.Next() 139 t.tok = RSH 140 return RSH 141 } 142 } 143 return t.tok 144 } 145 146 func (t *Tokenizer) Close() { 147 if t.file != nil { 148 t.file.Close() 149 } 150 }