github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/cmd/compile/internal/syntax/source.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements source, a buffered rune reader 6 // which is specialized for the needs of the Go scanner: 7 // Contiguous sequences of runes (literals) are extracted 8 // directly as []byte without the need to re-encode the 9 // runes in UTF-8 (as would be necessary with bufio.Reader). 10 // 11 // This file is self-contained (go tool compile source.go 12 // compiles) and thus could be made into its own package. 13 14 package syntax 15 16 import ( 17 "io" 18 "unicode/utf8" 19 ) 20 21 // starting points for line and column numbers 22 const linebase = 1 23 const colbase = 1 24 25 // buf [...read...|...|...unread...|s|...free...] 26 // ^ ^ ^ ^ 27 // | | | | 28 // suf r0 r w 29 30 type source struct { 31 src io.Reader 32 errh func(line, pos uint, msg string) 33 34 // source buffer 35 buf [4 << 10]byte 36 r0, r, w int // previous/current read and write buf positions, excluding sentinel 37 line0, line uint // previous/current line 38 col0, col uint // previous/current column (byte offsets from line start) 39 ioerr error // pending io error 40 41 // literal buffer 42 lit []byte // literal prefix 43 suf int // literal suffix; suf >= 0 means we are scanning a literal 44 } 45 46 // init initializes source to read from src and to report errors via errh. 47 // errh must not be nil. 48 func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) { 49 s.src = src 50 s.errh = errh 51 52 s.buf[0] = utf8.RuneSelf // terminate with sentinel 53 s.r0, s.r, s.w = 0, 0, 0 54 s.line0, s.line = 0, linebase 55 s.col0, s.col = 0, colbase 56 s.ioerr = nil 57 58 s.lit = s.lit[:0] 59 s.suf = -1 60 } 61 62 // ungetr ungets the most recently read rune. 63 func (s *source) ungetr() { 64 s.r, s.line, s.col = s.r0, s.line0, s.col0 65 } 66 67 // ungetr2 is like ungetr but enables a 2nd ungetr. 68 // It must not be called if one of the runes seen 69 // was a newline or had a UTF-8 encoding longer than 70 // 1 byte. 71 func (s *source) ungetr2() { 72 s.ungetr() 73 // line must not have changed 74 s.r0-- 75 s.col0-- 76 } 77 78 func (s *source) error(msg string) { 79 s.errh(s.line0, s.col0, msg) 80 } 81 82 // getr reads and returns the next rune. 83 // 84 // If a read or source encoding error occurs, getr 85 // calls the error handler installed with init. 86 // The handler must exist. 87 // 88 // The (line, col) position passed to the error handler 89 // is always at the current source reading position. 90 func (s *source) getr() rune { 91 redo: 92 s.r0, s.line0, s.col0 = s.r, s.line, s.col 93 94 // We could avoid at least one test that is always taken in the 95 // for loop below by duplicating the common case code (ASCII) 96 // here since we always have at least the sentinel (utf8.RuneSelf) 97 // in the buffer. Measure and optimize if necessary. 98 99 // make sure we have at least one rune in buffer, or we are at EOF 100 for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.ioerr == nil && s.w-s.r < len(s.buf) { 101 s.fill() // s.w-s.r < len(s.buf) => buffer is not full 102 } 103 104 // common case: ASCII and enough bytes 105 // (invariant: s.buf[s.w] == utf8.RuneSelf) 106 if b := s.buf[s.r]; b < utf8.RuneSelf { 107 s.r++ 108 // TODO(gri) Optimization: Instead of adjusting s.col for each character, 109 // remember the line offset instead and then compute the offset as needed 110 // (which is less often). 111 s.col++ 112 if b == 0 { 113 s.error("invalid NUL character") 114 goto redo 115 } 116 if b == '\n' { 117 s.line++ 118 s.col = colbase 119 } 120 return rune(b) 121 } 122 123 // EOF 124 if s.r == s.w { 125 if s.ioerr != io.EOF { 126 // ensure we never start with a '/' (e.g., rooted path) in the error message 127 s.error("I/O error: " + s.ioerr.Error()) 128 } 129 return -1 130 } 131 132 // uncommon case: not ASCII 133 r, w := utf8.DecodeRune(s.buf[s.r:s.w]) 134 s.r += w 135 s.col += uint(w) 136 137 if r == utf8.RuneError && w == 1 { 138 s.error("invalid UTF-8 encoding") 139 goto redo 140 } 141 142 // BOM's are only allowed as the first character in a file 143 const BOM = 0xfeff 144 if r == BOM { 145 if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1) 146 s.error("invalid BOM in the middle of the file") 147 } 148 goto redo 149 } 150 151 return r 152 } 153 154 func (s *source) fill() { 155 // Slide unread bytes to beginning but preserve last read char 156 // (for one ungetr call) plus one extra byte (for a 2nd ungetr 157 // call, only for ".." character sequence and float literals 158 // starting with "."). 159 if s.r0 > 1 { 160 // save literal prefix, if any 161 // (We see at most one ungetr call while reading 162 // a literal, so make sure s.r0 remains in buf.) 163 if s.suf >= 0 { 164 s.lit = append(s.lit, s.buf[s.suf:s.r0]...) 165 s.suf = 1 // == s.r0 after slide below 166 } 167 n := s.r0 - 1 168 copy(s.buf[:], s.buf[n:s.w]) 169 s.r0 = 1 // eqv: s.r0 -= n 170 s.r -= n 171 s.w -= n 172 } 173 174 // read more data: try a limited number of times 175 for i := 100; i > 0; i-- { 176 n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel 177 if n < 0 { 178 panic("negative read") // incorrect underlying io.Reader implementation 179 } 180 s.w += n 181 if n > 0 || err != nil { 182 s.buf[s.w] = utf8.RuneSelf // sentinel 183 if err != nil { 184 s.ioerr = err 185 } 186 return 187 } 188 } 189 190 s.buf[s.w] = utf8.RuneSelf // sentinel 191 s.ioerr = io.ErrNoProgress 192 } 193 194 func (s *source) startLit() { 195 s.suf = s.r0 196 s.lit = s.lit[:0] // reuse lit 197 } 198 199 func (s *source) stopLit() []byte { 200 lit := s.buf[s.suf:s.r] 201 if len(s.lit) > 0 { 202 lit = append(s.lit, lit...) 203 } 204 s.killLit() 205 return lit 206 } 207 208 func (s *source) killLit() { 209 s.suf = -1 // no pending literal 210 }