github.com/zxy12/go_duplicate_112_new@v0.0.0-20200807091221-747231827200/src/cmd/asm/internal/lex/tokenizer.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package lex 6 7 import ( 8 "io" 9 "os" 10 "strings" 11 "text/scanner" 12 "unicode" 13 14 "cmd/asm/internal/flags" 15 "cmd/internal/objabi" 16 "cmd/internal/src" 17 ) 18 19 // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured 20 // for our purposes and made a TokenReader. It forms the lowest level, 21 // turning text from readers into tokens. 22 type Tokenizer struct { 23 tok ScanToken 24 s *scanner.Scanner 25 base *src.PosBase 26 line int 27 file *os.File // If non-nil, file descriptor to close. 28 } 29 30 func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { 31 var s scanner.Scanner 32 s.Init(r) 33 // Newline is like a semicolon; other space characters are fine. 34 s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' 35 // Don't skip comments: we need to count newlines. 36 s.Mode = scanner.ScanChars | 37 scanner.ScanFloats | 38 scanner.ScanIdents | 39 scanner.ScanInts | 40 scanner.ScanStrings | 41 scanner.ScanComments 42 s.Position.Filename = name 43 s.IsIdentRune = isIdentRune 44 return &Tokenizer{ 45 s: &s, 46 base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)), 47 line: 1, 48 file: file, 49 } 50 } 51 52 // We want center dot (·) and division slash (∕) to work as identifier characters. 53 func isIdentRune(ch rune, i int) bool { 54 if unicode.IsLetter(ch) { 55 return true 56 } 57 switch ch { 58 case '_': // Underscore; traditional. 59 return true 60 case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot 61 return true 62 case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash 63 return true 64 } 65 // Digits are OK only after the first character. 66 return i > 0 && unicode.IsDigit(ch) 67 } 68 69 func (t *Tokenizer) Text() string { 70 switch t.tok { 71 case LSH: 72 return "<<" 73 case RSH: 74 return ">>" 75 case ARR: 76 return "->" 77 case ROT: 78 return "@>" 79 } 80 return t.s.TokenText() 81 } 82 83 func (t *Tokenizer) File() string { 84 return t.base.Filename() 85 } 86 87 func (t *Tokenizer) Base() *src.PosBase { 88 return t.base 89 } 90 91 func (t *Tokenizer) SetBase(base *src.PosBase) { 92 t.base = base 93 } 94 95 func (t *Tokenizer) Line() int { 96 return t.line 97 } 98 99 func (t *Tokenizer) Col() int { 100 return t.s.Pos().Column 101 } 102 103 func (t *Tokenizer) Next() ScanToken { 104 s := t.s 105 for { 106 t.tok = ScanToken(s.Scan()) 107 // fmt.Printf("tok=%d,%T, comment=%v,%T, intent=%v, int=%v\n", t.tok, t.tok, scanner.Comment, scanner.Comment, scanner.Ident, scanner.Int) 108 if t.tok != scanner.Comment { 109 break 110 } 111 length := strings.Count(s.TokenText(), "\n") 112 t.line += length 113 // TODO: If we ever have //go: comments in assembly, will need to keep them here. 114 // For now, just discard all comments. 115 } 116 switch t.tok { 117 case '\n': 118 t.line++ 119 case '-': 120 if s.Peek() == '>' { 121 s.Next() 122 t.tok = ARR 123 return ARR 124 } 125 case '@': 126 if s.Peek() == '>' { 127 s.Next() 128 t.tok = ROT 129 return ROT 130 } 131 case '<': 132 if s.Peek() == '<' { 133 s.Next() 134 t.tok = LSH 135 return LSH 136 } 137 case '>': 138 if s.Peek() == '>' { 139 s.Next() 140 t.tok = RSH 141 return RSH 142 } 143 } 144 return t.tok 145 } 146 147 func (t *Tokenizer) Close() { 148 if t.file != nil { 149 t.file.Close() 150 } 151 }