github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/syntax/source.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 import ( 8 "fmt" 9 "io" 10 "unicode/utf8" 11 ) 12 13 // buf [...read...|...|...unread...|s|...free...] 14 // ^ ^ ^ ^ 15 // | | | | 16 // suf r0 r w 17 18 type source struct { 19 src io.Reader 20 errh ErrorHandler 21 22 // source buffer 23 buf [4 << 10]byte 24 offs int // source offset of buf 25 r0, r, w int // previous/current read and write buf positions, excluding sentinel 26 line0, line int // previous/current line 27 err error // pending io error 28 29 // literal buffer 30 lit []byte // literal prefix 31 suf int // literal suffix; suf >= 0 means we are scanning a literal 32 } 33 34 func (s *source) init(src io.Reader, errh ErrorHandler) { 35 s.src = src 36 s.errh = errh 37 38 s.buf[0] = utf8.RuneSelf // terminate with sentinel 39 s.offs = 0 40 s.r0, s.r, s.w = 0, 0, 0 41 s.line0, s.line = 1, 1 42 s.err = nil 43 44 s.lit = s.lit[:0] 45 s.suf = -1 46 } 47 48 func (s *source) error(msg string) { 49 s.error_at(s.pos0(), s.line0, msg) 50 } 51 52 func (s *source) error_at(pos, line int, msg string) { 53 if s.errh != nil { 54 s.errh(pos, line, msg) 55 return 56 } 57 panic(fmt.Sprintf("%d: %s", line, msg)) 58 } 59 60 // pos0 returns the byte position of the last character read. 61 func (s *source) pos0() int { 62 return s.offs + s.r0 63 } 64 65 func (s *source) ungetr() { 66 s.r, s.line = s.r0, s.line0 67 } 68 69 func (s *source) getr() rune { 70 redo: 71 s.r0, s.line0 = s.r, s.line 72 73 // We could avoid at least one test that is always taken in the 74 // for loop below by duplicating the common case code (ASCII) 75 // here since we always have at least the sentinel (utf8.RuneSelf) 76 // in the buffer. Measure and optimize if necessary. 77 78 // make sure we have at least one rune in buffer, or we are at EOF 79 for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) { 80 s.fill() // s.w-s.r < len(s.buf) => buffer is not full 81 } 82 83 // common case: ASCII and enough bytes 84 // (invariant: s.buf[s.w] == utf8.RuneSelf) 85 if b := s.buf[s.r]; b < utf8.RuneSelf { 86 s.r++ 87 if b == 0 { 88 s.error("invalid NUL character") 89 goto redo 90 } 91 if b == '\n' { 92 s.line++ 93 } 94 return rune(b) 95 } 96 97 // EOF 98 if s.r == s.w { 99 if s.err != io.EOF { 100 s.error(s.err.Error()) 101 } 102 return -1 103 } 104 105 // uncommon case: not ASCII 106 r, w := utf8.DecodeRune(s.buf[s.r:s.w]) 107 s.r += w 108 109 if r == utf8.RuneError && w == 1 { 110 s.error("invalid UTF-8 encoding") 111 goto redo 112 } 113 114 // BOM's are only allowed as the first character in a file 115 const BOM = 0xfeff 116 if r == BOM { 117 if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1) 118 s.error("invalid BOM in the middle of the file") 119 } 120 goto redo 121 } 122 123 return r 124 } 125 126 func (s *source) fill() { 127 // Slide unread bytes to beginning but preserve last read char 128 // (for one ungetr call) plus one extra byte (for a 2nd ungetr 129 // call, only for ".." character sequence and float literals 130 // starting with "."). 131 if s.r0 > 1 { 132 // save literal prefix, if any 133 // (We see at most one ungetr call while reading 134 // a literal, so make sure s.r0 remains in buf.) 135 if s.suf >= 0 { 136 s.lit = append(s.lit, s.buf[s.suf:s.r0]...) 137 s.suf = 1 // == s.r0 after slide below 138 } 139 s.offs += s.r0 - 1 140 r := s.r - s.r0 + 1 // last read char plus one byte 141 s.w = r + copy(s.buf[r:], s.buf[s.r:s.w]) 142 s.r = r 143 s.r0 = 1 144 } 145 146 // read more data: try a limited number of times 147 for i := 100; i > 0; i-- { 148 n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel 149 if n < 0 { 150 panic("negative read") // incorrect underlying io.Reader implementation 151 } 152 s.w += n 153 if n > 0 || err != nil { 154 s.buf[s.w] = utf8.RuneSelf // sentinel 155 if err != nil { 156 s.err = err 157 } 158 return 159 } 160 } 161 162 s.err = io.ErrNoProgress 163 } 164 165 func (s *source) startLit() { 166 s.suf = s.r0 167 s.lit = s.lit[:0] // reuse lit 168 } 169 170 func (s *source) stopLit() []byte { 171 lit := s.buf[s.suf:s.r] 172 if len(s.lit) > 0 { 173 lit = append(s.lit, lit...) 174 } 175 s.suf = -1 // no pending literal 176 return lit 177 }