github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/asm/lex/tokenizer.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package lex 6 7 import ( 8 "go/build/constraint" 9 "io" 10 "os" 11 "strings" 12 "text/scanner" 13 "unicode" 14 15 "github.com/go-asm/go/cmd/asm/flags" 16 "github.com/go-asm/go/cmd/objabi" 17 "github.com/go-asm/go/cmd/src" 18 ) 19 20 // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured 21 // for our purposes and made a TokenReader. It forms the lowest level, 22 // turning text from readers into tokens. 23 type Tokenizer struct { 24 tok ScanToken 25 s *scanner.Scanner 26 base *src.PosBase 27 line int 28 file *os.File // If non-nil, file descriptor to close. 29 } 30 31 func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { 32 var s scanner.Scanner 33 s.Init(r) 34 // Newline is like a semicolon; other space characters are fine. 35 s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' 36 // Don't skip comments: we need to count newlines. 37 s.Mode = scanner.ScanChars | 38 scanner.ScanFloats | 39 scanner.ScanIdents | 40 scanner.ScanInts | 41 scanner.ScanStrings | 42 scanner.ScanComments 43 s.Position.Filename = name 44 s.IsIdentRune = isIdentRune 45 return &Tokenizer{ 46 s: &s, 47 base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)), 48 line: 1, 49 file: file, 50 } 51 } 52 53 // We want center dot (·) and division slash (∕) to work as identifier characters. 54 func isIdentRune(ch rune, i int) bool { 55 if unicode.IsLetter(ch) { 56 return true 57 } 58 switch ch { 59 case '_': // Underscore; traditional. 60 return true 61 case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot 62 return true 63 case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash 64 return true 65 } 66 // Digits are OK only after the first character. 67 return i > 0 && unicode.IsDigit(ch) 68 } 69 70 func (t *Tokenizer) Text() string { 71 switch t.tok { 72 case LSH: 73 return "<<" 74 case RSH: 75 return ">>" 76 case ARR: 77 return "->" 78 case ROT: 79 return "@>" 80 } 81 return t.s.TokenText() 82 } 83 84 func (t *Tokenizer) File() string { 85 return t.base.Filename() 86 } 87 88 func (t *Tokenizer) Base() *src.PosBase { 89 return t.base 90 } 91 92 func (t *Tokenizer) SetBase(base *src.PosBase) { 93 t.base = base 94 } 95 96 func (t *Tokenizer) Line() int { 97 return t.line 98 } 99 100 func (t *Tokenizer) Col() int { 101 return t.s.Pos().Column 102 } 103 104 func (t *Tokenizer) Next() ScanToken { 105 s := t.s 106 for { 107 t.tok = ScanToken(s.Scan()) 108 if t.tok != scanner.Comment { 109 break 110 } 111 text := s.TokenText() 112 t.line += strings.Count(text, "\n") 113 if constraint.IsGoBuild(text) { 114 t.tok = BuildComment 115 break 116 } 117 } 118 switch t.tok { 119 case '\n': 120 t.line++ 121 case '-': 122 if s.Peek() == '>' { 123 s.Next() 124 t.tok = ARR 125 return ARR 126 } 127 case '@': 128 if s.Peek() == '>' { 129 s.Next() 130 t.tok = ROT 131 return ROT 132 } 133 case '<': 134 if s.Peek() == '<' { 135 s.Next() 136 t.tok = LSH 137 return LSH 138 } 139 case '>': 140 if s.Peek() == '>' { 141 s.Next() 142 t.tok = RSH 143 return RSH 144 } 145 } 146 return t.tok 147 } 148 149 func (t *Tokenizer) Close() { 150 if t.file != nil { 151 t.file.Close() 152 } 153 }