github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/syntax/source.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements source, a buffered rune reader 6 // which is specialized for the needs of the Go scanner: 7 // Contiguous sequences of runes (literals) are extracted 8 // directly as []byte without the need to re-encode the 9 // runes in UTF-8 (as would be necessary with bufio.Reader). 10 // 11 // This file is self-contained (go tool compile source.go 12 // compiles) and thus could be made into its own package. 13 14 package syntax 15 16 import ( 17 "io" 18 "unicode/utf8" 19 ) 20 21 // starting points for line and column numbers 22 const linebase = 1 23 const colbase = 1 24 25 // max. number of bytes to unread 26 const maxunread = 10 27 28 // buf [...read...|...|...unread...|s|...free...] 29 // ^ ^ ^ ^ 30 // | | | | 31 // suf r0 r w 32 33 type source struct { 34 src io.Reader 35 errh func(line, pos uint, msg string) 36 37 // source buffer 38 buf [4 << 10]byte 39 r0, r, w int // previous/current read and write buf positions, excluding sentinel 40 line0, line uint // previous/current line 41 col0, col uint // previous/current column (byte offsets from line start) 42 ioerr error // pending io error 43 44 // literal buffer 45 lit []byte // literal prefix 46 suf int // literal suffix; suf >= 0 means we are scanning a literal 47 } 48 49 // init initializes source to read from src and to report errors via errh. 50 // errh must not be nil. 51 func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) { 52 s.src = src 53 s.errh = errh 54 55 s.buf[0] = utf8.RuneSelf // terminate with sentinel 56 s.r0, s.r, s.w = 0, 0, 0 57 s.line0, s.line = 0, linebase 58 s.col0, s.col = 0, colbase 59 s.ioerr = nil 60 61 s.lit = s.lit[:0] 62 s.suf = -1 63 } 64 65 // ungetr sets the reading position to a previous reading 66 // position, usually the one of the most recently read 67 // rune, but possibly earlier (see unread below). 68 func (s *source) ungetr() { 69 s.r, s.line, s.col = s.r0, s.line0, s.col0 70 } 71 72 // unread moves the previous reading position to a position 73 // that is n bytes earlier in the source. The next ungetr 74 // call will set the reading position to that moved position. 75 // The "unread" runes must be single byte and not contain any 76 // newlines; and 0 <= n <= maxunread must hold. 77 func (s *source) unread(n int) { 78 s.r0 -= n 79 s.col0 -= uint(n) 80 } 81 82 func (s *source) error(msg string) { 83 s.errh(s.line0, s.col0, msg) 84 } 85 86 // getr reads and returns the next rune. 87 // 88 // If a read or source encoding error occurs, getr 89 // calls the error handler installed with init. 90 // The handler must exist. 91 // 92 // The (line, col) position passed to the error handler 93 // is always at the current source reading position. 94 func (s *source) getr() rune { 95 redo: 96 s.r0, s.line0, s.col0 = s.r, s.line, s.col 97 98 // We could avoid at least one test that is always taken in the 99 // for loop below by duplicating the common case code (ASCII) 100 // here since we always have at least the sentinel (utf8.RuneSelf) 101 // in the buffer. Measure and optimize if necessary. 102 103 // make sure we have at least one rune in buffer, or we are at EOF 104 for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.ioerr == nil && s.w-s.r < len(s.buf) { 105 s.fill() // s.w-s.r < len(s.buf) => buffer is not full 106 } 107 108 // common case: ASCII and enough bytes 109 // (invariant: s.buf[s.w] == utf8.RuneSelf) 110 if b := s.buf[s.r]; b < utf8.RuneSelf { 111 s.r++ 112 // TODO(gri) Optimization: Instead of adjusting s.col for each character, 113 // remember the line offset instead and then compute the offset as needed 114 // (which is less often). 115 s.col++ 116 if b == 0 { 117 s.error("invalid NUL character") 118 goto redo 119 } 120 if b == '\n' { 121 s.line++ 122 s.col = colbase 123 } 124 return rune(b) 125 } 126 127 // EOF 128 if s.r == s.w { 129 if s.ioerr != io.EOF { 130 // ensure we never start with a '/' (e.g., rooted path) in the error message 131 s.error("I/O error: " + s.ioerr.Error()) 132 } 133 return -1 134 } 135 136 // uncommon case: not ASCII 137 r, w := utf8.DecodeRune(s.buf[s.r:s.w]) 138 s.r += w 139 s.col += uint(w) 140 141 if r == utf8.RuneError && w == 1 { 142 s.error("invalid UTF-8 encoding") 143 goto redo 144 } 145 146 // BOM's are only allowed as the first character in a file 147 const BOM = 0xfeff 148 if r == BOM { 149 if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to maxunread) 150 s.error("invalid BOM in the middle of the file") 151 } 152 goto redo 153 } 154 155 return r 156 } 157 158 func (s *source) fill() { 159 // Slide unread bytes to beginning but preserve last read char 160 // (for one ungetr call) plus maxunread extra bytes (for one 161 // unread call). 162 if s.r0 > maxunread { 163 n := s.r0 - maxunread // number of bytes to slide down 164 // save literal prefix, if any 165 // (make sure we keep maxunread bytes and the last 166 // read char in the buffer) 167 if s.suf >= 0 { 168 // we have a literal 169 if s.suf < n { 170 // save literal prefix 171 s.lit = append(s.lit, s.buf[s.suf:n]...) 172 s.suf = 0 173 } else { 174 s.suf -= n 175 } 176 } 177 copy(s.buf[:], s.buf[n:s.w]) 178 s.r0 = maxunread // eqv: s.r0 -= n 179 s.r -= n 180 s.w -= n 181 } 182 183 // read more data: try a limited number of times 184 for i := 100; i > 0; i-- { 185 n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel 186 if n < 0 { 187 panic("negative read") // incorrect underlying io.Reader implementation 188 } 189 s.w += n 190 if n > 0 || err != nil { 191 s.buf[s.w] = utf8.RuneSelf // sentinel 192 if err != nil { 193 s.ioerr = err 194 } 195 return 196 } 197 } 198 199 s.buf[s.w] = utf8.RuneSelf // sentinel 200 s.ioerr = io.ErrNoProgress 201 } 202 203 func (s *source) startLit() { 204 s.suf = s.r0 205 s.lit = s.lit[:0] // reuse lit 206 } 207 208 func (s *source) stopLit() []byte { 209 lit := s.buf[s.suf:s.r] 210 if len(s.lit) > 0 { 211 lit = append(s.lit, lit...) 212 } 213 s.killLit() 214 return lit 215 } 216 217 func (s *source) killLit() { 218 s.suf = -1 // no pending literal 219 }