github.com/FenixAra/go@v0.0.0-20170127160404-96ea0918e670/src/cmd/compile/internal/syntax/source.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"io"
     9  	"unicode/utf8"
    10  )
    11  
    12  // buf [...read...|...|...unread...|s|...free...]
    13  //         ^      ^   ^            ^
    14  //         |      |   |            |
    15  //        suf     r0  r            w
    16  
    17  type source struct {
    18  	src   io.Reader
    19  	errh  ErrorHandler
    20  	first error // first error encountered
    21  
    22  	// source buffer
    23  	buf         [4 << 10]byte
    24  	offs        int   // source offset of buf
    25  	r0, r, w    int   // previous/current read and write buf positions, excluding sentinel
    26  	line0, line int   // previous/current line
    27  	err         error // pending io error
    28  
    29  	// literal buffer
    30  	lit []byte // literal prefix
    31  	suf int    // literal suffix; suf >= 0 means we are scanning a literal
    32  }
    33  
    34  func (s *source) init(src io.Reader, errh ErrorHandler) {
    35  	s.src = src
    36  	s.errh = errh
    37  	s.first = nil
    38  
    39  	s.buf[0] = utf8.RuneSelf // terminate with sentinel
    40  	s.offs = 0
    41  	s.r0, s.r, s.w = 0, 0, 0
    42  	s.line0, s.line = 1, 1
    43  	s.err = nil
    44  
    45  	s.lit = s.lit[:0]
    46  	s.suf = -1
    47  }
    48  
    49  func (s *source) error(msg string) {
    50  	s.error_at(s.pos0(), s.line0, msg)
    51  }
    52  
    53  func (s *source) error_at(pos, line int, msg string) {
    54  	err := Error{pos, line, msg}
    55  	if s.first == nil {
    56  		s.first = err
    57  	}
    58  	if s.errh == nil {
    59  		panic(s.first)
    60  	}
    61  	s.errh(err)
    62  }
    63  
    64  // pos0 returns the byte position of the last character read.
    65  func (s *source) pos0() int {
    66  	return s.offs + s.r0
    67  }
    68  
    69  func (s *source) ungetr() {
    70  	s.r, s.line = s.r0, s.line0
    71  }
    72  
    73  func (s *source) getr() rune {
    74  redo:
    75  	s.r0, s.line0 = s.r, s.line
    76  
    77  	// We could avoid at least one test that is always taken in the
    78  	// for loop below by duplicating the common case code (ASCII)
    79  	// here since we always have at least the sentinel (utf8.RuneSelf)
    80  	// in the buffer. Measure and optimize if necessary.
    81  
    82  	// make sure we have at least one rune in buffer, or we are at EOF
    83  	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
    84  		s.fill() // s.w-s.r < len(s.buf) => buffer is not full
    85  	}
    86  
    87  	// common case: ASCII and enough bytes
    88  	// (invariant: s.buf[s.w] == utf8.RuneSelf)
    89  	if b := s.buf[s.r]; b < utf8.RuneSelf {
    90  		s.r++
    91  		if b == 0 {
    92  			s.error("invalid NUL character")
    93  			goto redo
    94  		}
    95  		if b == '\n' {
    96  			s.line++
    97  		}
    98  		return rune(b)
    99  	}
   100  
   101  	// EOF
   102  	if s.r == s.w {
   103  		if s.err != io.EOF {
   104  			s.error(s.err.Error())
   105  		}
   106  		return -1
   107  	}
   108  
   109  	// uncommon case: not ASCII
   110  	r, w := utf8.DecodeRune(s.buf[s.r:s.w])
   111  	s.r += w
   112  
   113  	if r == utf8.RuneError && w == 1 {
   114  		s.error("invalid UTF-8 encoding")
   115  		goto redo
   116  	}
   117  
   118  	// BOM's are only allowed as the first character in a file
   119  	const BOM = 0xfeff
   120  	if r == BOM {
   121  		if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
   122  			s.error("invalid BOM in the middle of the file")
   123  		}
   124  		goto redo
   125  	}
   126  
   127  	return r
   128  }
   129  
   130  func (s *source) fill() {
   131  	// Slide unread bytes to beginning but preserve last read char
   132  	// (for one ungetr call) plus one extra byte (for a 2nd ungetr
   133  	// call, only for ".." character sequence and float literals
   134  	// starting with ".").
   135  	if s.r0 > 1 {
   136  		// save literal prefix, if any
   137  		// (We see at most one ungetr call while reading
   138  		// a literal, so make sure s.r0 remains in buf.)
   139  		if s.suf >= 0 {
   140  			s.lit = append(s.lit, s.buf[s.suf:s.r0]...)
   141  			s.suf = 1 // == s.r0 after slide below
   142  		}
   143  		s.offs += s.r0 - 1
   144  		r := s.r - s.r0 + 1 // last read char plus one byte
   145  		s.w = r + copy(s.buf[r:], s.buf[s.r:s.w])
   146  		s.r = r
   147  		s.r0 = 1
   148  	}
   149  
   150  	// read more data: try a limited number of times
   151  	for i := 100; i > 0; i-- {
   152  		n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel
   153  		if n < 0 {
   154  			panic("negative read") // incorrect underlying io.Reader implementation
   155  		}
   156  		s.w += n
   157  		if n > 0 || err != nil {
   158  			s.buf[s.w] = utf8.RuneSelf // sentinel
   159  			if err != nil {
   160  				s.err = err
   161  			}
   162  			return
   163  		}
   164  	}
   165  
   166  	s.err = io.ErrNoProgress
   167  }
   168  
   169  func (s *source) startLit() {
   170  	s.suf = s.r0
   171  	s.lit = s.lit[:0] // reuse lit
   172  }
   173  
   174  func (s *source) stopLit() []byte {
   175  	lit := s.buf[s.suf:s.r]
   176  	if len(s.lit) > 0 {
   177  		lit = append(s.lit, lit...)
   178  	}
   179  	s.suf = -1 // no pending literal
   180  	return lit
   181  }