rsc.io/go@v0.0.0-20150416155037-e040fd465409/src/cmd/asm/internal/lex/tokenizer.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package lex 6 7 import ( 8 "io" 9 "os" 10 "strings" 11 "text/scanner" 12 "unicode" 13 14 "cmd/internal/obj" 15 ) 16 17 // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured 18 // for our purposes and made a TokenReader. It forms the lowest level, 19 // turning text from readers into tokens. 20 type Tokenizer struct { 21 tok ScanToken 22 s *scanner.Scanner 23 line int 24 fileName string 25 file *os.File // If non-nil, file descriptor to close. 26 } 27 28 func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { 29 var s scanner.Scanner 30 s.Init(r) 31 // Newline is like a semicolon; other space characters are fine. 32 s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' 33 // Don't skip comments: we need to count newlines. 34 s.Mode = scanner.ScanChars | 35 scanner.ScanFloats | 36 scanner.ScanIdents | 37 scanner.ScanInts | 38 scanner.ScanStrings | 39 scanner.ScanComments 40 s.Position.Filename = name 41 s.IsIdentRune = isIdentRune 42 if file != nil { 43 obj.Linklinehist(linkCtxt, histLine, name, 0) 44 } 45 return &Tokenizer{ 46 s: &s, 47 line: 1, 48 fileName: name, 49 file: file, 50 } 51 } 52 53 // We want center dot (·) and division slash (∕) to work as identifier characters. 54 func isIdentRune(ch rune, i int) bool { 55 if unicode.IsLetter(ch) { 56 return true 57 } 58 switch ch { 59 case '_': // Underscore; traditional. 60 return true 61 case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot 62 return true 63 case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash 64 return true 65 } 66 // Digits are OK only after the first character. 67 return i > 0 && unicode.IsDigit(ch) 68 } 69 70 func (t *Tokenizer) Text() string { 71 switch t.tok { 72 case LSH: 73 return "<<" 74 case RSH: 75 return ">>" 76 case ARR: 77 return "->" 78 case ROT: 79 return "@>" 80 } 81 return t.s.TokenText() 82 } 83 84 func (t *Tokenizer) File() string { 85 return t.fileName 86 } 87 88 func (t *Tokenizer) Line() int { 89 return t.line 90 } 91 92 func (t *Tokenizer) Col() int { 93 return t.s.Pos().Column 94 } 95 96 func (t *Tokenizer) SetPos(line int, file string) { 97 t.line = line 98 t.fileName = file 99 } 100 101 func (t *Tokenizer) Next() ScanToken { 102 s := t.s 103 for { 104 t.tok = ScanToken(s.Scan()) 105 if t.tok != scanner.Comment { 106 break 107 } 108 length := strings.Count(s.TokenText(), "\n") 109 t.line += length 110 histLine += length 111 // TODO: If we ever have //go: comments in assembly, will need to keep them here. 112 // For now, just discard all comments. 113 } 114 switch t.tok { 115 case '\n': 116 if t.file != nil { 117 histLine++ 118 } 119 t.line++ 120 case '-': 121 if s.Peek() == '>' { 122 s.Next() 123 t.tok = ARR 124 return ARR 125 } 126 case '@': 127 if s.Peek() == '>' { 128 s.Next() 129 t.tok = ROT 130 return ROT 131 } 132 case '<': 133 if s.Peek() == '<' { 134 s.Next() 135 t.tok = LSH 136 return LSH 137 } 138 case '>': 139 if s.Peek() == '>' { 140 s.Next() 141 t.tok = RSH 142 return RSH 143 } 144 } 145 return t.tok 146 } 147 148 func (t *Tokenizer) Close() { 149 if t.file != nil { 150 t.file.Close() 151 // It's an open file, so pop the line history. 152 obj.Linklinehist(linkCtxt, histLine, "<pop>", 0) 153 } 154 }